2005-04-17 02:20:36 +04:00
/*
* NET An implementation of the SOCKET network access protocol .
*
* Version : @ ( # ) socket . c 1.1 .93 18 / 02 / 95
*
* Authors : Orest Zborowski , < obz @ Kodak . COM >
2005-05-06 03:16:16 +04:00
* Ross Biro
2005-04-17 02:20:36 +04:00
* Fred N . van Kempen , < waltje @ uWalt . NL . Mugnet . ORG >
*
* Fixes :
* Anonymous : NOTSOCK / BADF cleanup . Error fix in
* shutdown ( )
* Alan Cox : verify_area ( ) fixes
* Alan Cox : Removed DDI
* Jonathan Kamens : SOCK_DGRAM reconnect bug
* Alan Cox : Moved a load of checks to the very
* top level .
* Alan Cox : Move address structures to / from user
* mode above the protocol layers .
* Rob Janssen : Allow 0 length sends .
* Alan Cox : Asynchronous I / O support ( cribbed from the
* tty drivers ) .
* Niibe Yutaka : Asynchronous I / O for writes ( 4.4 BSD style )
* Jeff Uphoff : Made max number of sockets command - line
* configurable .
* Matti Aarnio : Made the number of sockets dynamic ,
* to be allocated when needed , and mr .
* Uphoff ' s max is used as max to be
* allowed to allocate .
* Linus : Argh . removed all the socket allocation
* altogether : it ' s in the inode now .
* Alan Cox : Made sock_alloc ( ) / sock_release ( ) public
* for NetROM and future kernel nfsd type
* stuff .
* Alan Cox : sendmsg / recvmsg basics .
* Tom Dyas : Export net symbols .
* Marcin Dalecki : Fixed problems with CONFIG_NET = " n " .
* Alan Cox : Added thread locking to sys_ * calls
* for sockets . May have errors at the
* moment .
* Kevin Buhr : Fixed the dumb errors in the above .
* Andi Kleen : Some small cleanups , optimizations ,
* and fixed a copy_from_user ( ) bug .
* Tigran Aivazian : sys_send ( args ) calls sys_sendto ( args , NULL , 0 )
2006-09-01 11:19:31 +04:00
* Tigran Aivazian : Made listen ( 2 ) backlog sanity checks
2005-04-17 02:20:36 +04:00
* protocol - independent
*
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*
*
* This module is effectively the top level interface to the BSD socket
2006-09-01 11:19:31 +04:00
* paradigm .
2005-04-17 02:20:36 +04:00
*
* Based upon Swansea University Computer Society NET3 .039
*/
# include <linux/mm.h>
# include <linux/socket.h>
# include <linux/file.h>
# include <linux/net.h>
# include <linux/interrupt.h>
flag parameters: paccept
This patch is by far the most complex in the series. It adds a new syscall
paccept. This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:
- a signal mask
- a flags value
The flags parameter can be used to set flag like SOCK_CLOEXEC. This is
imlpemented here as well. Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX. Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc). So an interface change in inevitable.
The flag value is the same as for socket and socketpair. I think diverging
here will only create confusion. Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.
The signal mask is handled as for pselect etc. The mask is temporarily
installed for the thread and removed before the call returns. I modeled the
code after pselect. If there is a problem it's likely also in pselect.
For architectures which use socketcall I maintained this interface instead of
adding a system call. The symmetry shouldn't be broken.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_CLOEXEC O_CLOEXEC
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
sleep (2);
pthread_kill ((pthread_t) arg, SIGUSR1);
return NULL;
}
static void
handler (int s)
{
}
int
main (void)
{
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
int coe = fcntl (s2, F_GETFD);
if (coe & FD_CLOEXEC)
{
puts ("paccept(0) set close-on-exec-flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
if (s2 < 0)
{
puts ("paccept(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (s2, F_GETFD);
if ((coe & FD_CLOEXEC) == 0)
{
puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
struct sigaction sa;
sa.sa_handler = handler;
sa.sa_flags = 0;
sigemptyset (&sa.sa_mask);
sigaction (SIGUSR1, &sa, NULL);
sigset_t ss;
pthread_sigmask (SIG_SETMASK, NULL, &ss);
sigaddset (&ss, SIGUSR1);
pthread_sigmask (SIG_SETMASK, &ss, NULL);
sigdelset (&ss, SIGUSR1);
alarm (4);
pthread_barrier_wait (&b);
errno = 0 ;
s2 = paccept (s, NULL, 0, &ss, 0);
if (s2 != -1 || errno != EINTR)
{
puts ("paccept did not fail with EINTR");
return 1;
}
close (s);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:20 +04:00
# include <linux/thread_info.h>
2006-09-01 11:23:39 +04:00
# include <linux/rcupdate.h>
2005-04-17 02:20:36 +04:00
# include <linux/netdevice.h>
# include <linux/proc_fs.h>
# include <linux/seq_file.h>
2006-03-21 09:33:17 +03:00
# include <linux/mutex.h>
2005-04-17 02:20:36 +04:00
# include <linux/wanrouter.h>
# include <linux/if_bridge.h>
2005-08-16 09:18:02 +04:00
# include <linux/if_frad.h>
# include <linux/if_vlan.h>
2005-04-17 02:20:36 +04:00
# include <linux/init.h>
# include <linux/poll.h>
# include <linux/cache.h>
# include <linux/module.h>
# include <linux/highmem.h>
# include <linux/mount.h>
# include <linux/security.h>
# include <linux/syscalls.h>
# include <linux/compat.h>
# include <linux/kmod.h>
2005-05-17 15:08:48 +04:00
# include <linux/audit.h>
2006-01-21 02:46:55 +03:00
# include <linux/wireless.h>
2007-10-09 10:24:22 +04:00
# include <linux/nsproxy.h>
2009-09-23 03:43:33 +04:00
# include <linux/magic.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 11:04:11 +03:00
# include <linux/slab.h>
2005-04-17 02:20:36 +04:00
# include <asm/uaccess.h>
# include <asm/unistd.h>
# include <net/compat.h>
2008-06-03 20:14:03 +04:00
# include <net/wext.h>
2005-04-17 02:20:36 +04:00
# include <net/sock.h>
# include <linux/netfilter.h>
2009-11-07 10:10:54 +03:00
# include <linux/if_tun.h>
# include <linux/ipv6_route.h>
# include <linux/route.h>
# include <linux/sockios.h>
# include <linux/atalk.h>
2005-04-17 02:20:36 +04:00
static int sock_no_open ( struct inode * irrelevant , struct file * dontcare ) ;
2006-10-01 10:28:46 +04:00
static ssize_t sock_aio_read ( struct kiocb * iocb , const struct iovec * iov ,
unsigned long nr_segs , loff_t pos ) ;
static ssize_t sock_aio_write ( struct kiocb * iocb , const struct iovec * iov ,
unsigned long nr_segs , loff_t pos ) ;
2006-09-01 11:19:31 +04:00
static int sock_mmap ( struct file * file , struct vm_area_struct * vma ) ;
2005-04-17 02:20:36 +04:00
static int sock_close ( struct inode * inode , struct file * file ) ;
static unsigned int sock_poll ( struct file * file ,
struct poll_table_struct * wait ) ;
2006-09-01 11:19:31 +04:00
static long sock_ioctl ( struct file * file , unsigned int cmd , unsigned long arg ) ;
2006-03-22 10:58:08 +03:00
# ifdef CONFIG_COMPAT
static long compat_sock_ioctl ( struct file * file ,
2006-09-01 11:19:31 +04:00
unsigned int cmd , unsigned long arg ) ;
2006-03-22 10:58:08 +03:00
# endif
2005-04-17 02:20:36 +04:00
static int sock_fasync ( int fd , struct file * filp , int on ) ;
static ssize_t sock_sendpage ( struct file * file , struct page * page ,
int offset , size_t size , loff_t * ppos , int more ) ;
2007-11-07 10:30:13 +03:00
static ssize_t sock_splice_read ( struct file * file , loff_t * ppos ,
struct pipe_inode_info * pipe , size_t len ,
unsigned int flags ) ;
2005-04-17 02:20:36 +04:00
/*
* Socket files have a set of ' special ' operations as well as the generic file ones . These don ' t appear
* in the operation structures but are done directly via the socketcall ( ) multiplexor .
*/
2007-02-12 11:55:36 +03:00
static const struct file_operations socket_file_ops = {
2005-04-17 02:20:36 +04:00
. owner = THIS_MODULE ,
. llseek = no_llseek ,
. aio_read = sock_aio_read ,
. aio_write = sock_aio_write ,
. poll = sock_poll ,
. unlocked_ioctl = sock_ioctl ,
2006-03-22 10:58:08 +03:00
# ifdef CONFIG_COMPAT
. compat_ioctl = compat_sock_ioctl ,
# endif
2005-04-17 02:20:36 +04:00
. mmap = sock_mmap ,
. open = sock_no_open , /* special open code to disallow open via /proc */
. release = sock_close ,
. fasync = sock_fasync ,
2006-03-30 17:15:30 +04:00
. sendpage = sock_sendpage ,
. splice_write = generic_splice_sendpage ,
2007-11-07 10:30:13 +03:00
. splice_read = sock_splice_read ,
2005-04-17 02:20:36 +04:00
} ;
/*
* The protocol list . Each protocol is registered in here .
*/
static DEFINE_SPINLOCK ( net_family_lock ) ;
2006-08-10 08:03:17 +04:00
static const struct net_proto_family * net_families [ NPROTO ] __read_mostly ;
2005-04-17 02:20:36 +04:00
/*
* Statistics counters of the socket lists
*/
static DEFINE_PER_CPU ( int , sockets_in_use ) = 0 ;
/*
2006-09-01 11:19:31 +04:00
* Support routines .
* Move socket addresses back and forth across the kernel / user
* divide and look after the messy bits .
2005-04-17 02:20:36 +04:00
*/
2006-09-01 11:19:31 +04:00
# define MAX_SOCK_ADDR 128 / * 108 for Unix domain -
2005-04-17 02:20:36 +04:00
16 for IP , 16 for IPX ,
24 for IPv6 ,
2006-09-01 11:19:31 +04:00
about 80 for AX .25
2005-04-17 02:20:36 +04:00
must be at least one bigger than
the AF_UNIX size ( see net / unix / af_unix . c
2006-09-01 11:19:31 +04:00
: unix_mkname ( ) ) .
2005-04-17 02:20:36 +04:00
*/
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
/**
* move_addr_to_kernel - copy a socket address into kernel space
* @ uaddr : Address in user space
* @ kaddr : Address in kernel space
* @ ulen : Length in user space
*
* The address is copied into kernel space . If the provided address is
* too long an error code of - EINVAL is returned . If the copy gives
* invalid addresses - EFAULT is returned . On a success 0 is returned .
*/
2008-07-20 09:35:47 +04:00
int move_addr_to_kernel ( void __user * uaddr , int ulen , struct sockaddr * kaddr )
2005-04-17 02:20:36 +04:00
{
2008-07-20 09:35:47 +04:00
if ( ulen < 0 | | ulen > sizeof ( struct sockaddr_storage ) )
2005-04-17 02:20:36 +04:00
return - EINVAL ;
2006-09-01 11:19:31 +04:00
if ( ulen = = 0 )
2005-04-17 02:20:36 +04:00
return 0 ;
2006-09-01 11:19:31 +04:00
if ( copy_from_user ( kaddr , uaddr , ulen ) )
2005-04-17 02:20:36 +04:00
return - EFAULT ;
2005-05-17 15:08:48 +04:00
return audit_sockaddr ( ulen , kaddr ) ;
2005-04-17 02:20:36 +04:00
}
/**
* move_addr_to_user - copy an address to user space
* @ kaddr : kernel space address
* @ klen : length of address in kernel
* @ uaddr : user space address
* @ ulen : pointer to user length field
*
* The value pointed to by ulen on entry is the buffer length available .
* This is overwritten with the buffer space used . - EINVAL is returned
* if an overlong buffer is specified or a negative buffer size . - EFAULT
* is returned if either the buffer or the length field are not
* accessible .
* After copying the data up to the limit the user specifies , the true
* length of the data is written over the length limit the user
* specified . Zero is returned for a success .
*/
2006-09-01 11:19:31 +04:00
2008-07-20 09:35:47 +04:00
int move_addr_to_user ( struct sockaddr * kaddr , int klen , void __user * uaddr ,
2006-09-01 11:19:31 +04:00
int __user * ulen )
2005-04-17 02:20:36 +04:00
{
int err ;
int len ;
2006-09-01 11:19:31 +04:00
err = get_user ( len , ulen ) ;
if ( err )
2005-04-17 02:20:36 +04:00
return err ;
2006-09-01 11:19:31 +04:00
if ( len > klen )
len = klen ;
2008-07-20 09:35:47 +04:00
if ( len < 0 | | len > sizeof ( struct sockaddr_storage ) )
2005-04-17 02:20:36 +04:00
return - EINVAL ;
2006-09-01 11:19:31 +04:00
if ( len ) {
2006-03-30 21:20:22 +04:00
if ( audit_sockaddr ( klen , kaddr ) )
return - ENOMEM ;
2006-09-01 11:19:31 +04:00
if ( copy_to_user ( uaddr , kaddr , len ) )
2005-04-17 02:20:36 +04:00
return - EFAULT ;
}
/*
2006-09-01 11:19:31 +04:00
* " fromlen shall refer to the value before truncation.. "
* 1003.1 g
2005-04-17 02:20:36 +04:00
*/
return __put_user ( klen , ulen ) ;
}
2006-12-07 07:33:20 +03:00
static struct kmem_cache * sock_inode_cachep __read_mostly ;
2005-04-17 02:20:36 +04:00
static struct inode * sock_alloc_inode ( struct super_block * sb )
{
struct socket_alloc * ei ;
2006-09-01 11:19:31 +04:00
2006-12-07 07:33:17 +03:00
ei = kmem_cache_alloc ( sock_inode_cachep , GFP_KERNEL ) ;
2005-04-17 02:20:36 +04:00
if ( ! ei )
return NULL ;
init_waitqueue_head ( & ei - > socket . wait ) ;
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
ei - > socket . fasync_list = NULL ;
ei - > socket . state = SS_UNCONNECTED ;
ei - > socket . flags = 0 ;
ei - > socket . ops = NULL ;
ei - > socket . sk = NULL ;
ei - > socket . file = NULL ;
return & ei - > vfs_inode ;
}
static void sock_destroy_inode ( struct inode * inode )
{
kmem_cache_free ( sock_inode_cachep ,
container_of ( inode , struct socket_alloc , vfs_inode ) ) ;
}
2008-07-26 06:45:34 +04:00
static void init_once ( void * foo )
2005-04-17 02:20:36 +04:00
{
2006-09-01 11:19:31 +04:00
struct socket_alloc * ei = ( struct socket_alloc * ) foo ;
2005-04-17 02:20:36 +04:00
2007-05-17 09:10:57 +04:00
inode_init_once ( & ei - > vfs_inode ) ;
2005-04-17 02:20:36 +04:00
}
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
static int init_inodecache ( void )
{
sock_inode_cachep = kmem_cache_create ( " sock_inode_cache " ,
2006-09-01 11:19:31 +04:00
sizeof ( struct socket_alloc ) ,
0 ,
( SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
SLAB_MEM_SPREAD ) ,
2007-07-20 05:11:58 +04:00
init_once ) ;
2005-04-17 02:20:36 +04:00
if ( sock_inode_cachep = = NULL )
return - ENOMEM ;
return 0 ;
}
2009-09-22 04:01:09 +04:00
static const struct super_operations sockfs_ops = {
2005-04-17 02:20:36 +04:00
. alloc_inode = sock_alloc_inode ,
. destroy_inode = sock_destroy_inode ,
. statfs = simple_statfs ,
} ;
[PATCH] VFS: Permit filesystem to override root dentry on mount
Extend the get_sb() filesystem operation to take an extra argument that
permits the VFS to pass in the target vfsmount that defines the mountpoint.
The filesystem is then required to manually set the superblock and root dentry
pointers. For most filesystems, this should be done with simple_set_mnt()
which will set the superblock pointer and then set the root dentry to the
superblock's s_root (as per the old default behaviour).
The get_sb() op now returns an integer as there's now no need to return the
superblock pointer.
This patch permits a superblock to be implicitly shared amongst several mount
points, such as can be done with NFS to avoid potential inode aliasing. In
such a case, simple_set_mnt() would not be called, and instead the mnt_root
and mnt_sb would be set directly.
The patch also makes the following changes:
(*) the get_sb_*() convenience functions in the core kernel now take a vfsmount
pointer argument and return an integer, so most filesystems have to change
very little.
(*) If one of the convenience function is not used, then get_sb() should
normally call simple_set_mnt() to instantiate the vfsmount. This will
always return 0, and so can be tail-called from get_sb().
(*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the
dcache upon superblock destruction rather than shrink_dcache_anon().
This is required because the superblock may now have multiple trees that
aren't actually bound to s_root, but that still need to be cleaned up. The
currently called functions assume that the whole tree is rooted at s_root,
and that anonymous dentries are not the roots of trees which results in
dentries being left unculled.
However, with the way NFS superblock sharing are currently set to be
implemented, these assumptions are violated: the root of the filesystem is
simply a dummy dentry and inode (the real inode for '/' may well be
inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries
with child trees.
[*] Anonymous until discovered from another tree.
(*) The documentation has been adjusted, including the additional bit of
changing ext2_* into foo_* in the documentation.
[akpm@osdl.org: convert ipath_fs, do other stuff]
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Nathan Scott <nathans@sgi.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-23 13:02:57 +04:00
static int sockfs_get_sb ( struct file_system_type * fs_type ,
2006-09-01 11:19:31 +04:00
int flags , const char * dev_name , void * data ,
struct vfsmount * mnt )
2005-04-17 02:20:36 +04:00
{
[PATCH] VFS: Permit filesystem to override root dentry on mount
Extend the get_sb() filesystem operation to take an extra argument that
permits the VFS to pass in the target vfsmount that defines the mountpoint.
The filesystem is then required to manually set the superblock and root dentry
pointers. For most filesystems, this should be done with simple_set_mnt()
which will set the superblock pointer and then set the root dentry to the
superblock's s_root (as per the old default behaviour).
The get_sb() op now returns an integer as there's now no need to return the
superblock pointer.
This patch permits a superblock to be implicitly shared amongst several mount
points, such as can be done with NFS to avoid potential inode aliasing. In
such a case, simple_set_mnt() would not be called, and instead the mnt_root
and mnt_sb would be set directly.
The patch also makes the following changes:
(*) the get_sb_*() convenience functions in the core kernel now take a vfsmount
pointer argument and return an integer, so most filesystems have to change
very little.
(*) If one of the convenience function is not used, then get_sb() should
normally call simple_set_mnt() to instantiate the vfsmount. This will
always return 0, and so can be tail-called from get_sb().
(*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the
dcache upon superblock destruction rather than shrink_dcache_anon().
This is required because the superblock may now have multiple trees that
aren't actually bound to s_root, but that still need to be cleaned up. The
currently called functions assume that the whole tree is rooted at s_root,
and that anonymous dentries are not the roots of trees which results in
dentries being left unculled.
However, with the way NFS superblock sharing are currently set to be
implemented, these assumptions are violated: the root of the filesystem is
simply a dummy dentry and inode (the real inode for '/' may well be
inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries
with child trees.
[*] Anonymous until discovered from another tree.
(*) The documentation has been adjusted, including the additional bit of
changing ext2_* into foo_* in the documentation.
[akpm@osdl.org: convert ipath_fs, do other stuff]
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Nathan Scott <nathans@sgi.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-23 13:02:57 +04:00
return get_sb_pseudo ( fs_type , " socket: " , & sockfs_ops , SOCKFS_MAGIC ,
mnt ) ;
2005-04-17 02:20:36 +04:00
}
2005-08-26 23:05:31 +04:00
static struct vfsmount * sock_mnt __read_mostly ;
2005-04-17 02:20:36 +04:00
static struct file_system_type sock_fs_type = {
. name = " sockfs " ,
. get_sb = sockfs_get_sb ,
. kill_sb = kill_anon_super ,
} ;
2006-09-01 11:19:31 +04:00
2007-05-08 11:26:18 +04:00
/*
* sockfs_dname ( ) is called from d_path ( ) .
*/
static char * sockfs_dname ( struct dentry * dentry , char * buffer , int buflen )
{
return dynamic_dname ( dentry , buffer , buflen , " socket:[%lu] " ,
dentry - > d_inode - > i_ino ) ;
}
2009-02-20 09:02:22 +03:00
static const struct dentry_operations sockfs_dentry_operations = {
2007-05-08 11:26:18 +04:00
. d_dname = sockfs_dname ,
2005-04-17 02:20:36 +04:00
} ;
/*
* Obtains the first available file descriptor and sets it up for use .
*
2006-03-21 04:13:49 +03:00
* These functions create file structures and maps them to fd space
* of the current process . On success it returns file descriptor
2005-04-17 02:20:36 +04:00
* and file struct implicitly stored in sock - > file .
* Note that another thread may close file descriptor before we return
* from this function . We use the fact that now we do not refer
* to socket after mapping . If one day we will need it , this
* function will increment ref . count on file by 1.
*
* In any case returned fd MAY BE not valid !
* This race condition is unavoidable
* with shared fd spaces , we cannot solve it inside kernel ,
* but we take care of internal coherence yet .
*/
2009-08-05 19:59:08 +04:00
static int sock_alloc_file ( struct socket * sock , struct file * * f , int flags )
2005-04-17 02:20:36 +04:00
{
2009-08-05 19:59:08 +04:00
struct qstr name = { . name = " " } ;
2009-08-09 00:52:35 +04:00
struct path path ;
2009-08-05 19:59:08 +04:00
struct file * file ;
2005-04-17 02:20:36 +04:00
int fd ;
flag parameters: socket and socketpair
This patch adds support for flag values which are ORed to the type passwd
to socket and socketpair. The additional code is minimal. The flag
values in this implementation can and must match the O_* flags. This
avoids overhead in the conversion.
The internal functions sock_alloc_fd and sock_map_fd get a new parameters
and all callers are changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#define PORT 57392
/* For Linux these must be the same. */
#define SOCK_CLOEXEC O_CLOEXEC
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("socket(0) set close-on-exec flag");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
if (fd == -1)
{
puts ("socket(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
{
puts ("socketpair(SOCK_CLOEXEC) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:17 +04:00
fd = get_unused_fd_flags ( flags ) ;
2009-08-05 19:59:08 +04:00
if ( unlikely ( fd < 0 ) )
return fd ;
2005-04-17 02:20:36 +04:00
2009-08-09 00:52:35 +04:00
path . dentry = d_alloc ( sock_mnt - > mnt_sb - > s_root , & name ) ;
if ( unlikely ( ! path . dentry ) ) {
2009-08-05 19:59:08 +04:00
put_unused_fd ( fd ) ;
2006-03-21 04:13:49 +03:00
return - ENOMEM ;
2009-08-05 19:59:08 +04:00
}
2009-08-09 00:52:35 +04:00
path . mnt = mntget ( sock_mnt ) ;
2006-03-21 04:13:49 +03:00
2009-08-09 00:52:35 +04:00
path . dentry - > d_op = & sockfs_dentry_operations ;
d_instantiate ( path . dentry , SOCK_INODE ( sock ) ) ;
2009-08-06 09:43:59 +04:00
SOCK_INODE ( sock ) - > i_fop = & socket_file_ops ;
2006-03-21 04:13:49 +03:00
2009-08-09 00:52:35 +04:00
file = alloc_file ( & path , FMODE_READ | FMODE_WRITE ,
2007-10-17 10:31:13 +04:00
& socket_file_ops ) ;
2009-08-06 09:43:59 +04:00
if ( unlikely ( ! file ) ) {
/* drop dentry, keep inode */
atomic_inc ( & path . dentry - > d_inode - > i_count ) ;
2009-08-09 00:52:35 +04:00
path_put ( & path ) ;
2009-08-06 09:43:59 +04:00
put_unused_fd ( fd ) ;
return - ENFILE ;
}
sock - > file = file ;
flag parameters: NONBLOCK in socket and socketpair
This patch introduces support for the SOCK_NONBLOCK flag in socket,
socketpair, and paccept. To do this the internal function sock_attach_fd
gets an additional parameter which it uses to set the appropriate flag for
the file descriptor.
Given that in modern, scalable programs almost all socket connections are
non-blocking and the minimal additional cost for the new functionality
I see no reason not to add this code.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_NONBLOCK O_NONBLOCK
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
return NULL;
}
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
puts ("socket(0) set non-blocking mode");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0);
if (fd == -1)
{
puts ("socket(SOCK_NONBLOCK) failed");
return 1;
}
fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
fl = fcntl (fds[i], F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1)
{
puts ("socketpair(SOCK_NONBLOCK) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
fl = fcntl (fds[i], F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, NULL) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
fl = fcntl (s2, F_GETFL);
if (fl & O_NONBLOCK)
{
puts ("paccept(0) set non-blocking mode");
return 1;
}
close (s2);
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK);
if (s2 < 0)
{
puts ("paccept(SOCK_NONBLOCK) failed");
return 1;
}
fl = fcntl (s2, F_GETFL);
if ((fl & O_NONBLOCK) == 0)
{
puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode");
return 1;
}
close (s2);
close (s);
pthread_barrier_wait (&b);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:35 +04:00
file - > f_flags = O_RDWR | ( flags & O_NONBLOCK ) ;
2006-03-21 04:13:49 +03:00
file - > f_pos = 0 ;
file - > private_data = sock ;
2005-04-17 02:20:36 +04:00
2009-08-05 19:59:08 +04:00
* f = file ;
return fd ;
2006-03-21 04:13:49 +03:00
}
flag parameters: socket and socketpair
This patch adds support for flag values which are ORed to the type passwd
to socket and socketpair. The additional code is minimal. The flag
values in this implementation can and must match the O_* flags. This
avoids overhead in the conversion.
The internal functions sock_alloc_fd and sock_map_fd get a new parameters
and all callers are changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#define PORT 57392
/* For Linux these must be the same. */
#define SOCK_CLOEXEC O_CLOEXEC
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("socket(0) set close-on-exec flag");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
if (fd == -1)
{
puts ("socket(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
{
puts ("socketpair(SOCK_CLOEXEC) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:17 +04:00
int sock_map_fd ( struct socket * sock , int flags )
2006-03-21 04:13:49 +03:00
{
struct file * newfile ;
2009-08-05 19:59:08 +04:00
int fd = sock_alloc_file ( sock , & newfile , flags ) ;
2006-03-21 04:13:49 +03:00
2009-08-05 19:59:08 +04:00
if ( likely ( fd > = 0 ) )
2006-03-21 04:13:49 +03:00
fd_install ( fd , newfile ) ;
2009-08-05 19:59:08 +04:00
2005-04-17 02:20:36 +04:00
return fd ;
}
2006-03-21 09:27:12 +03:00
static struct socket * sock_from_file ( struct file * file , int * err )
{
if ( file - > f_op = = & socket_file_ops )
return file - > private_data ; /* set in sock_map_fd */
2007-02-09 01:59:57 +03:00
* err = - ENOTSOCK ;
return NULL ;
2006-03-21 09:27:12 +03:00
}
2005-04-17 02:20:36 +04:00
/**
* sockfd_lookup - Go from a file number to its socket slot
* @ fd : file handle
* @ err : pointer to an error code return
*
* The file handle passed in is locked and the socket it is bound
* too is returned . If an error occurs the err pointer is overwritten
* with a negative errno code and NULL is returned . The function checks
* for both invalid handles and passing a handle which is not a socket .
*
* On a success the socket object pointer is returned .
*/
struct socket * sockfd_lookup ( int fd , int * err )
{
struct file * file ;
struct socket * sock ;
2006-09-01 11:19:31 +04:00
file = fget ( fd ) ;
if ( ! file ) {
2005-04-17 02:20:36 +04:00
* err = - EBADF ;
return NULL ;
}
2006-09-01 11:19:31 +04:00
2006-03-21 09:27:12 +03:00
sock = sock_from_file ( file , err ) ;
if ( ! sock )
2005-04-17 02:20:36 +04:00
fput ( file ) ;
2006-03-21 09:27:12 +03:00
return sock ;
}
2005-04-17 02:20:36 +04:00
2006-03-21 09:27:12 +03:00
static struct socket * sockfd_lookup_light ( int fd , int * err , int * fput_needed )
{
struct file * file ;
struct socket * sock ;
2006-04-20 02:25:02 +04:00
* err = - EBADF ;
2006-03-21 09:27:12 +03:00
file = fget_light ( fd , fput_needed ) ;
if ( file ) {
sock = sock_from_file ( file , err ) ;
if ( sock )
return sock ;
fput_light ( file , * fput_needed ) ;
2005-04-17 02:20:36 +04:00
}
2006-03-21 09:27:12 +03:00
return NULL ;
2005-04-17 02:20:36 +04:00
}
/**
* sock_alloc - allocate a socket
2006-09-01 11:19:31 +04:00
*
2005-04-17 02:20:36 +04:00
* Allocate a new inode and socket object . The two are bound together
* and initialised . The socket is then returned . If we are out of inodes
* NULL is returned .
*/
static struct socket * sock_alloc ( void )
{
2006-09-01 11:19:31 +04:00
struct inode * inode ;
struct socket * sock ;
2005-04-17 02:20:36 +04:00
inode = new_inode ( sock_mnt - > mnt_sb ) ;
if ( ! inode )
return NULL ;
sock = SOCKET_I ( inode ) ;
2009-09-15 13:39:20 +04:00
kmemcheck_annotate_bitfield ( sock , type ) ;
2006-09-01 11:19:31 +04:00
inode - > i_mode = S_IFSOCK | S_IRWXUGO ;
2008-11-14 02:39:10 +03:00
inode - > i_uid = current_fsuid ( ) ;
inode - > i_gid = current_fsgid ( ) ;
2005-04-17 02:20:36 +04:00
socket: use percpu_add() while updating sockets_in_use
sock_alloc() currently uses following code to update sockets_in_use
get_cpu_var(sockets_in_use)++;
put_cpu_var(sockets_in_use);
This translates to :
c0436274: b8 01 00 00 00 mov $0x1,%eax
c0436279: e8 42 40 df ff call c022a2c0 <add_preempt_count>
c043627e: bb 20 4f 6a c0 mov $0xc06a4f20,%ebx
c0436283: e8 18 ca f0 ff call c0342ca0 <debug_smp_processor_id>
c0436288: 03 1c 85 60 4a 65 c0 add -0x3f9ab5a0(,%eax,4),%ebx
c043628f: ff 03 incl (%ebx)
c0436291: b8 01 00 00 00 mov $0x1,%eax
c0436296: e8 75 3f df ff call c022a210 <sub_preempt_count>
c043629b: 89 e0 mov %esp,%eax
c043629d: 25 00 e0 ff ff and $0xffffe000,%eax
c04362a2: f6 40 08 08 testb $0x8,0x8(%eax)
c04362a6: 75 07 jne c04362af <sock_alloc+0x7f>
c04362a8: 8d 46 d8 lea -0x28(%esi),%eax
c04362ab: 5b pop %ebx
c04362ac: 5e pop %esi
c04362ad: c9 leave
c04362ae: c3 ret
c04362af: e8 cc 5d 09 00 call c04cc080 <preempt_schedule>
c04362b4: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi
c04362b8: eb ee jmp c04362a8 <sock_alloc+0x78>
While percpu_add(sockets_in_use, 1) translates to a single instruction :
c0436275: 64 83 05 20 5f 6a c0 addl $0x1,%fs:0xc06a5f20
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2009-04-05 03:41:09 +04:00
percpu_add ( sockets_in_use , 1 ) ;
2005-04-17 02:20:36 +04:00
return sock ;
}
/*
* In theory you can ' t get an open on this inode , but / proc provides
* a back door . Remember to keep it shut otherwise you ' ll let the
* creepy crawlies in .
*/
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
static int sock_no_open ( struct inode * irrelevant , struct file * dontcare )
{
return - ENXIO ;
}
2006-03-28 13:56:42 +04:00
const struct file_operations bad_sock_fops = {
2005-04-17 02:20:36 +04:00
. owner = THIS_MODULE ,
. open = sock_no_open ,
} ;
/**
* sock_release - close a socket
* @ sock : socket to close
*
* The socket is released from the protocol stack if it has a release
* callback , and the inode is then released if the socket is bound to
2006-09-01 11:19:31 +04:00
* an inode not a file .
2005-04-17 02:20:36 +04:00
*/
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
void sock_release ( struct socket * sock )
{
if ( sock - > ops ) {
struct module * owner = sock - > ops - > owner ;
sock - > ops - > release ( sock ) ;
sock - > ops = NULL ;
module_put ( owner ) ;
}
if ( sock - > fasync_list )
printk ( KERN_ERR " sock_release: fasync list not empty! \n " ) ;
socket: use percpu_add() while updating sockets_in_use
sock_alloc() currently uses following code to update sockets_in_use
get_cpu_var(sockets_in_use)++;
put_cpu_var(sockets_in_use);
This translates to :
c0436274: b8 01 00 00 00 mov $0x1,%eax
c0436279: e8 42 40 df ff call c022a2c0 <add_preempt_count>
c043627e: bb 20 4f 6a c0 mov $0xc06a4f20,%ebx
c0436283: e8 18 ca f0 ff call c0342ca0 <debug_smp_processor_id>
c0436288: 03 1c 85 60 4a 65 c0 add -0x3f9ab5a0(,%eax,4),%ebx
c043628f: ff 03 incl (%ebx)
c0436291: b8 01 00 00 00 mov $0x1,%eax
c0436296: e8 75 3f df ff call c022a210 <sub_preempt_count>
c043629b: 89 e0 mov %esp,%eax
c043629d: 25 00 e0 ff ff and $0xffffe000,%eax
c04362a2: f6 40 08 08 testb $0x8,0x8(%eax)
c04362a6: 75 07 jne c04362af <sock_alloc+0x7f>
c04362a8: 8d 46 d8 lea -0x28(%esi),%eax
c04362ab: 5b pop %ebx
c04362ac: 5e pop %esi
c04362ad: c9 leave
c04362ae: c3 ret
c04362af: e8 cc 5d 09 00 call c04cc080 <preempt_schedule>
c04362b4: 8d 74 26 00 lea 0x0(%esi,%eiz,1),%esi
c04362b8: eb ee jmp c04362a8 <sock_alloc+0x78>
While percpu_add(sockets_in_use, 1) translates to a single instruction :
c0436275: 64 83 05 20 5f 6a c0 addl $0x1,%fs:0xc06a5f20
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2009-04-05 03:41:09 +04:00
percpu_sub ( sockets_in_use , 1 ) ;
2005-04-17 02:20:36 +04:00
if ( ! sock - > file ) {
iput ( SOCK_INODE ( sock ) ) ;
return ;
}
2006-09-01 11:19:31 +04:00
sock - > file = NULL ;
2005-04-17 02:20:36 +04:00
}
2009-02-12 08:03:38 +03:00
int sock_tx_timestamp ( struct msghdr * msg , struct sock * sk ,
union skb_shared_tx * shtx )
{
shtx - > flags = 0 ;
if ( sock_flag ( sk , SOCK_TIMESTAMPING_TX_HARDWARE ) )
shtx - > hardware = 1 ;
if ( sock_flag ( sk , SOCK_TIMESTAMPING_TX_SOFTWARE ) )
shtx - > software = 1 ;
return 0 ;
}
EXPORT_SYMBOL ( sock_tx_timestamp ) ;
2006-09-01 11:19:31 +04:00
static inline int __sock_sendmsg ( struct kiocb * iocb , struct socket * sock ,
2005-04-17 02:20:36 +04:00
struct msghdr * msg , size_t size )
{
struct sock_iocb * si = kiocb_to_siocb ( iocb ) ;
int err ;
si - > sock = sock ;
si - > scm = NULL ;
si - > msg = msg ;
si - > size = size ;
err = security_socket_sendmsg ( sock , msg , size ) ;
if ( err )
return err ;
return sock - > ops - > sendmsg ( iocb , sock , msg , size ) ;
}
int sock_sendmsg ( struct socket * sock , struct msghdr * msg , size_t size )
{
struct kiocb iocb ;
struct sock_iocb siocb ;
int ret ;
init_sync_kiocb ( & iocb , NULL ) ;
iocb . private = & siocb ;
ret = __sock_sendmsg ( & iocb , sock , msg , size ) ;
if ( - EIOCBQUEUED = = ret )
ret = wait_on_sync_kiocb ( & iocb ) ;
return ret ;
}
int kernel_sendmsg ( struct socket * sock , struct msghdr * msg ,
struct kvec * vec , size_t num , size_t size )
{
mm_segment_t oldfs = get_fs ( ) ;
int result ;
set_fs ( KERNEL_DS ) ;
/*
* the following is safe , since for compiler definitions of kvec and
* iovec are identical , yielding the same in - core layout and alignment
*/
2006-09-01 11:19:31 +04:00
msg - > msg_iov = ( struct iovec * ) vec ;
2005-04-17 02:20:36 +04:00
msg - > msg_iovlen = num ;
result = sock_sendmsg ( sock , msg , size ) ;
set_fs ( oldfs ) ;
return result ;
}
2009-02-12 08:03:38 +03:00
static int ktime2ts ( ktime_t kt , struct timespec * ts )
{
if ( kt . tv64 ) {
* ts = ktime_to_timespec ( kt ) ;
return 1 ;
} else {
return 0 ;
}
}
2007-03-26 09:14:49 +04:00
/*
* called from sock_recv_timestamp ( ) if sock_flag ( sk , SOCK_RCVTSTAMP )
*/
void __sock_recv_timestamp ( struct msghdr * msg , struct sock * sk ,
struct sk_buff * skb )
{
2009-02-12 08:03:38 +03:00
int need_software_tstamp = sock_flag ( sk , SOCK_RCVTSTAMP ) ;
struct timespec ts [ 3 ] ;
int empty = 1 ;
struct skb_shared_hwtstamps * shhwtstamps =
skb_hwtstamps ( skb ) ;
/* Race occurred between timestamp enabling and packet
receiving . Fill in the current time for now . */
if ( need_software_tstamp & & skb - > tstamp . tv64 = = 0 )
__net_timestamp ( skb ) ;
if ( need_software_tstamp ) {
if ( ! sock_flag ( sk , SOCK_RCVTSTAMPNS ) ) {
struct timeval tv ;
skb_get_timestamp ( skb , & tv ) ;
put_cmsg ( msg , SOL_SOCKET , SCM_TIMESTAMP ,
sizeof ( tv ) , & tv ) ;
} else {
2010-04-06 09:39:52 +04:00
skb_get_timestampns ( skb , & ts [ 0 ] ) ;
2009-02-12 08:03:38 +03:00
put_cmsg ( msg , SOL_SOCKET , SCM_TIMESTAMPNS ,
2010-04-06 09:39:52 +04:00
sizeof ( ts [ 0 ] ) , & ts [ 0 ] ) ;
2009-02-12 08:03:38 +03:00
}
}
memset ( ts , 0 , sizeof ( ts ) ) ;
if ( skb - > tstamp . tv64 & &
sock_flag ( sk , SOCK_TIMESTAMPING_SOFTWARE ) ) {
skb_get_timestampns ( skb , ts + 0 ) ;
empty = 0 ;
}
if ( shhwtstamps ) {
if ( sock_flag ( sk , SOCK_TIMESTAMPING_SYS_HARDWARE ) & &
ktime2ts ( shhwtstamps - > syststamp , ts + 1 ) )
empty = 0 ;
if ( sock_flag ( sk , SOCK_TIMESTAMPING_RAW_HARDWARE ) & &
ktime2ts ( shhwtstamps - > hwtstamp , ts + 2 ) )
empty = 0 ;
2007-03-26 09:14:49 +04:00
}
2009-02-12 08:03:38 +03:00
if ( ! empty )
put_cmsg ( msg , SOL_SOCKET ,
SCM_TIMESTAMPING , sizeof ( ts ) , & ts ) ;
2007-03-26 09:14:49 +04:00
}
2007-03-10 06:39:35 +03:00
EXPORT_SYMBOL_GPL ( __sock_recv_timestamp ) ;
net: Generalize socket rx gap / receive queue overflow cmsg
Create a new socket level option to report number of queue overflows
Recently I augmented the AF_PACKET protocol to report the number of frames lost
on the socket receive queue between any two enqueued frames. This value was
exported via a SOL_PACKET level cmsg. AFter I completed that work it was
requested that this feature be generalized so that any datagram oriented socket
could make use of this option. As such I've created this patch, It creates a
new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
overflowed between any two given frames. It also augments the AF_PACKET
protocol to take advantage of this new feature (as it previously did not touch
sk->sk_drops, which this patch uses to record the overflow count). Tested
successfully by me.
Notes:
1) Unlike my previous patch, this patch simply records the sk_drops value, which
is not a number of drops between packets, but rather a total number of drops.
Deltas must be computed in user space.
2) While this patch currently works with datagram oriented protocols, it will
also be accepted by non-datagram oriented protocols. I'm not sure if thats
agreeable to everyone, but my argument in favor of doing so is that, for those
protocols which aren't applicable to this option, sk_drops will always be zero,
and reporting no drops on a receive queue that isn't used for those
non-participating protocols seems reasonable to me. This also saves us having
to code in a per-protocol opt in mechanism.
3) This applies cleanly to net-next assuming that commit
977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2009-10-13 00:26:31 +04:00
inline void sock_recv_drops ( struct msghdr * msg , struct sock * sk , struct sk_buff * skb )
{
if ( sock_flag ( sk , SOCK_RXQ_OVFL ) & & skb & & skb - > dropcount )
put_cmsg ( msg , SOL_SOCKET , SO_RXQ_OVFL ,
sizeof ( __u32 ) , & skb - > dropcount ) ;
}
void sock_recv_ts_and_drops ( struct msghdr * msg , struct sock * sk ,
struct sk_buff * skb )
{
sock_recv_timestamp ( msg , sk , skb ) ;
sock_recv_drops ( msg , sk , skb ) ;
}
EXPORT_SYMBOL_GPL ( sock_recv_ts_and_drops ) ;
2009-10-13 10:40:10 +04:00
static inline int __sock_recvmsg_nosec ( struct kiocb * iocb , struct socket * sock ,
struct msghdr * msg , size_t size , int flags )
2005-04-17 02:20:36 +04:00
{
struct sock_iocb * si = kiocb_to_siocb ( iocb ) ;
si - > sock = sock ;
si - > scm = NULL ;
si - > msg = msg ;
si - > size = size ;
si - > flags = flags ;
return sock - > ops - > recvmsg ( iocb , sock , msg , size , flags ) ;
}
2009-10-13 10:40:10 +04:00
static inline int __sock_recvmsg ( struct kiocb * iocb , struct socket * sock ,
struct msghdr * msg , size_t size , int flags )
{
int err = security_socket_recvmsg ( sock , msg , size , flags ) ;
return err ? : __sock_recvmsg_nosec ( iocb , sock , msg , size , flags ) ;
}
2006-09-01 11:19:31 +04:00
int sock_recvmsg ( struct socket * sock , struct msghdr * msg ,
2005-04-17 02:20:36 +04:00
size_t size , int flags )
{
struct kiocb iocb ;
struct sock_iocb siocb ;
int ret ;
2006-09-01 11:19:31 +04:00
init_sync_kiocb ( & iocb , NULL ) ;
2005-04-17 02:20:36 +04:00
iocb . private = & siocb ;
ret = __sock_recvmsg ( & iocb , sock , msg , size , flags ) ;
if ( - EIOCBQUEUED = = ret )
ret = wait_on_sync_kiocb ( & iocb ) ;
return ret ;
}
2009-10-13 10:40:10 +04:00
static int sock_recvmsg_nosec ( struct socket * sock , struct msghdr * msg ,
size_t size , int flags )
{
struct kiocb iocb ;
struct sock_iocb siocb ;
int ret ;
init_sync_kiocb ( & iocb , NULL ) ;
iocb . private = & siocb ;
ret = __sock_recvmsg_nosec ( & iocb , sock , msg , size , flags ) ;
if ( - EIOCBQUEUED = = ret )
ret = wait_on_sync_kiocb ( & iocb ) ;
return ret ;
}
2006-09-01 11:19:31 +04:00
int kernel_recvmsg ( struct socket * sock , struct msghdr * msg ,
struct kvec * vec , size_t num , size_t size , int flags )
2005-04-17 02:20:36 +04:00
{
mm_segment_t oldfs = get_fs ( ) ;
int result ;
set_fs ( KERNEL_DS ) ;
/*
* the following is safe , since for compiler definitions of kvec and
* iovec are identical , yielding the same in - core layout and alignment
*/
2006-09-01 11:19:31 +04:00
msg - > msg_iov = ( struct iovec * ) vec , msg - > msg_iovlen = num ;
2005-04-17 02:20:36 +04:00
result = sock_recvmsg ( sock , msg , size , flags ) ;
set_fs ( oldfs ) ;
return result ;
}
static void sock_aio_dtor ( struct kiocb * iocb )
{
kfree ( iocb - > private ) ;
}
2005-12-23 08:08:46 +03:00
static ssize_t sock_sendpage ( struct file * file , struct page * page ,
int offset , size_t size , loff_t * ppos , int more )
2005-04-17 02:20:36 +04:00
{
struct socket * sock ;
int flags ;
2005-12-23 08:08:46 +03:00
sock = file - > private_data ;
flags = ! ( file - > f_flags & O_NONBLOCK ) ? 0 : MSG_DONTWAIT ;
if ( more )
flags | = MSG_MORE ;
2009-08-13 19:28:36 +04:00
return kernel_sendpage ( sock , page , offset , size , flags ) ;
2005-12-23 08:08:46 +03:00
}
2005-04-17 02:20:36 +04:00
2007-11-07 10:30:13 +03:00
static ssize_t sock_splice_read ( struct file * file , loff_t * ppos ,
struct pipe_inode_info * pipe , size_t len ,
unsigned int flags )
{
struct socket * sock = file - > private_data ;
2008-02-15 13:35:45 +03:00
if ( unlikely ( ! sock - > ops - > splice_read ) )
return - EINVAL ;
2007-11-07 10:30:13 +03:00
return sock - > ops - > splice_read ( sock , ppos , pipe , len , flags ) ;
}
2005-12-23 08:08:46 +03:00
static struct sock_iocb * alloc_sock_iocb ( struct kiocb * iocb ,
2006-09-01 11:19:31 +04:00
struct sock_iocb * siocb )
2005-12-23 08:08:46 +03:00
{
if ( ! is_sync_kiocb ( iocb ) ) {
siocb = kmalloc ( sizeof ( * siocb ) , GFP_KERNEL ) ;
if ( ! siocb )
return NULL ;
2005-04-17 02:20:36 +04:00
iocb - > ki_dtor = sock_aio_dtor ;
}
2005-12-23 08:08:46 +03:00
siocb - > kiocb = iocb ;
iocb - > private = siocb ;
return siocb ;
2005-04-17 02:20:36 +04:00
}
2005-12-23 08:08:46 +03:00
static ssize_t do_sock_read ( struct msghdr * msg , struct kiocb * iocb ,
2006-10-01 10:28:46 +04:00
struct file * file , const struct iovec * iov ,
unsigned long nr_segs )
2005-12-23 08:08:46 +03:00
{
struct socket * sock = file - > private_data ;
size_t size = 0 ;
int i ;
2005-04-17 02:20:36 +04:00
2006-09-01 11:19:31 +04:00
for ( i = 0 ; i < nr_segs ; i + + )
size + = iov [ i ] . iov_len ;
2005-04-17 02:20:36 +04:00
2005-12-23 08:08:46 +03:00
msg - > msg_name = NULL ;
msg - > msg_namelen = 0 ;
msg - > msg_control = NULL ;
msg - > msg_controllen = 0 ;
2006-09-01 11:19:31 +04:00
msg - > msg_iov = ( struct iovec * ) iov ;
2005-12-23 08:08:46 +03:00
msg - > msg_iovlen = nr_segs ;
msg - > msg_flags = ( file - > f_flags & O_NONBLOCK ) ? MSG_DONTWAIT : 0 ;
return __sock_recvmsg ( iocb , sock , msg , size , msg - > msg_flags ) ;
}
2006-10-01 10:28:46 +04:00
static ssize_t sock_aio_read ( struct kiocb * iocb , const struct iovec * iov ,
unsigned long nr_segs , loff_t pos )
2005-12-23 08:08:46 +03:00
{
struct sock_iocb siocb , * x ;
2005-04-17 02:20:36 +04:00
if ( pos ! = 0 )
return - ESPIPE ;
2006-10-01 10:28:46 +04:00
if ( iocb - > ki_left = = 0 ) /* Match SYS5 behaviour */
2005-04-17 02:20:36 +04:00
return 0 ;
2006-10-01 10:28:46 +04:00
x = alloc_sock_iocb ( iocb , & siocb ) ;
2005-12-23 08:08:46 +03:00
if ( ! x )
return - ENOMEM ;
2006-10-01 10:28:46 +04:00
return do_sock_read ( & x - > async_msg , iocb , iocb - > ki_filp , iov , nr_segs ) ;
2005-04-17 02:20:36 +04:00
}
2005-12-23 08:08:46 +03:00
static ssize_t do_sock_write ( struct msghdr * msg , struct kiocb * iocb ,
2006-10-01 10:28:46 +04:00
struct file * file , const struct iovec * iov ,
unsigned long nr_segs )
2005-04-17 02:20:36 +04:00
{
2005-12-23 08:08:46 +03:00
struct socket * sock = file - > private_data ;
size_t size = 0 ;
int i ;
2005-04-17 02:20:36 +04:00
2006-09-01 11:19:31 +04:00
for ( i = 0 ; i < nr_segs ; i + + )
size + = iov [ i ] . iov_len ;
2005-04-17 02:20:36 +04:00
2005-12-23 08:08:46 +03:00
msg - > msg_name = NULL ;
msg - > msg_namelen = 0 ;
msg - > msg_control = NULL ;
msg - > msg_controllen = 0 ;
2006-09-01 11:19:31 +04:00
msg - > msg_iov = ( struct iovec * ) iov ;
2005-12-23 08:08:46 +03:00
msg - > msg_iovlen = nr_segs ;
msg - > msg_flags = ( file - > f_flags & O_NONBLOCK ) ? MSG_DONTWAIT : 0 ;
if ( sock - > type = = SOCK_SEQPACKET )
msg - > msg_flags | = MSG_EOR ;
2005-04-17 02:20:36 +04:00
2005-12-23 08:08:46 +03:00
return __sock_sendmsg ( iocb , sock , msg , size ) ;
2005-04-17 02:20:36 +04:00
}
2006-10-01 10:28:46 +04:00
static ssize_t sock_aio_write ( struct kiocb * iocb , const struct iovec * iov ,
unsigned long nr_segs , loff_t pos )
2005-12-23 08:08:46 +03:00
{
struct sock_iocb siocb , * x ;
2005-04-17 02:20:36 +04:00
2005-12-23 08:08:46 +03:00
if ( pos ! = 0 )
return - ESPIPE ;
2006-10-01 10:28:46 +04:00
x = alloc_sock_iocb ( iocb , & siocb ) ;
2005-12-23 08:08:46 +03:00
if ( ! x )
return - ENOMEM ;
2005-04-17 02:20:36 +04:00
2006-10-01 10:28:46 +04:00
return do_sock_write ( & x - > async_msg , iocb , iocb - > ki_filp , iov , nr_segs ) ;
2005-04-17 02:20:36 +04:00
}
/*
* Atomic setting of ioctl hooks to avoid race
* with module unload .
*/
2006-03-21 09:33:17 +03:00
static DEFINE_MUTEX ( br_ioctl_mutex ) ;
2007-09-17 22:56:21 +04:00
static int ( * br_ioctl_hook ) ( struct net * , unsigned int cmd , void __user * arg ) = NULL ;
2005-04-17 02:20:36 +04:00
2007-09-17 22:56:21 +04:00
void brioctl_set ( int ( * hook ) ( struct net * , unsigned int , void __user * ) )
2005-04-17 02:20:36 +04:00
{
2006-03-21 09:33:17 +03:00
mutex_lock ( & br_ioctl_mutex ) ;
2005-04-17 02:20:36 +04:00
br_ioctl_hook = hook ;
2006-03-21 09:33:17 +03:00
mutex_unlock ( & br_ioctl_mutex ) ;
2005-04-17 02:20:36 +04:00
}
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
EXPORT_SYMBOL ( brioctl_set ) ;
2006-03-21 09:33:17 +03:00
static DEFINE_MUTEX ( vlan_ioctl_mutex ) ;
2007-09-17 22:56:21 +04:00
static int ( * vlan_ioctl_hook ) ( struct net * , void __user * arg ) ;
2005-04-17 02:20:36 +04:00
2007-09-17 22:56:21 +04:00
void vlan_ioctl_set ( int ( * hook ) ( struct net * , void __user * ) )
2005-04-17 02:20:36 +04:00
{
2006-03-21 09:33:17 +03:00
mutex_lock ( & vlan_ioctl_mutex ) ;
2005-04-17 02:20:36 +04:00
vlan_ioctl_hook = hook ;
2006-03-21 09:33:17 +03:00
mutex_unlock ( & vlan_ioctl_mutex ) ;
2005-04-17 02:20:36 +04:00
}
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
EXPORT_SYMBOL ( vlan_ioctl_set ) ;
2006-03-21 09:33:17 +03:00
static DEFINE_MUTEX ( dlci_ioctl_mutex ) ;
2006-09-01 11:19:31 +04:00
static int ( * dlci_ioctl_hook ) ( unsigned int , void __user * ) ;
2005-04-17 02:20:36 +04:00
2006-09-01 11:19:31 +04:00
void dlci_ioctl_set ( int ( * hook ) ( unsigned int , void __user * ) )
2005-04-17 02:20:36 +04:00
{
2006-03-21 09:33:17 +03:00
mutex_lock ( & dlci_ioctl_mutex ) ;
2005-04-17 02:20:36 +04:00
dlci_ioctl_hook = hook ;
2006-03-21 09:33:17 +03:00
mutex_unlock ( & dlci_ioctl_mutex ) ;
2005-04-17 02:20:36 +04:00
}
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
EXPORT_SYMBOL ( dlci_ioctl_set ) ;
2009-11-07 10:10:54 +03:00
static long sock_do_ioctl ( struct net * net , struct socket * sock ,
unsigned int cmd , unsigned long arg )
{
int err ;
void __user * argp = ( void __user * ) arg ;
err = sock - > ops - > ioctl ( sock , cmd , arg ) ;
/*
* If this ioctl is unknown try to hand it down
* to the NIC driver .
*/
if ( err = = - ENOIOCTLCMD )
err = dev_ioctl ( net , cmd , argp ) ;
return err ;
}
2005-04-17 02:20:36 +04:00
/*
* With an ioctl , arg may well be a user mode pointer , but we don ' t know
* what to do with it - that ' s up to the protocol still .
*/
static long sock_ioctl ( struct file * file , unsigned cmd , unsigned long arg )
{
struct socket * sock ;
2007-09-17 22:56:21 +04:00
struct sock * sk ;
2005-04-17 02:20:36 +04:00
void __user * argp = ( void __user * ) arg ;
int pid , err ;
2007-09-17 22:56:21 +04:00
struct net * net ;
2005-04-17 02:20:36 +04:00
2005-09-07 01:42:45 +04:00
sock = file - > private_data ;
2007-09-17 22:56:21 +04:00
sk = sock - > sk ;
2008-03-25 20:26:21 +03:00
net = sock_net ( sk ) ;
2005-04-17 02:20:36 +04:00
if ( cmd > = SIOCDEVPRIVATE & & cmd < = ( SIOCDEVPRIVATE + 15 ) ) {
2007-09-17 22:56:21 +04:00
err = dev_ioctl ( net , cmd , argp ) ;
2005-04-17 02:20:36 +04:00
} else
2009-09-30 01:27:28 +04:00
# ifdef CONFIG_WEXT_CORE
2005-04-17 02:20:36 +04:00
if ( cmd > = SIOCIWFIRST & & cmd < = SIOCIWLAST ) {
2007-09-17 22:56:21 +04:00
err = dev_ioctl ( net , cmd , argp ) ;
2005-04-17 02:20:36 +04:00
} else
2009-09-30 01:27:28 +04:00
# endif
2006-09-01 11:19:31 +04:00
switch ( cmd ) {
2005-04-17 02:20:36 +04:00
case FIOSETOWN :
case SIOCSPGRP :
err = - EFAULT ;
if ( get_user ( pid , ( int __user * ) argp ) )
break ;
err = f_setown ( sock - > file , pid , 1 ) ;
break ;
case FIOGETOWN :
case SIOCGPGRP :
2006-10-02 13:17:15 +04:00
err = put_user ( f_getown ( sock - > file ) ,
2006-09-01 11:19:31 +04:00
( int __user * ) argp ) ;
2005-04-17 02:20:36 +04:00
break ;
case SIOCGIFBR :
case SIOCSIFBR :
case SIOCBRADDBR :
case SIOCBRDELBR :
err = - ENOPKG ;
if ( ! br_ioctl_hook )
request_module ( " bridge " ) ;
2006-03-21 09:33:17 +03:00
mutex_lock ( & br_ioctl_mutex ) ;
2006-09-01 11:19:31 +04:00
if ( br_ioctl_hook )
2007-09-17 22:56:21 +04:00
err = br_ioctl_hook ( net , cmd , argp ) ;
2006-03-21 09:33:17 +03:00
mutex_unlock ( & br_ioctl_mutex ) ;
2005-04-17 02:20:36 +04:00
break ;
case SIOCGIFVLAN :
case SIOCSIFVLAN :
err = - ENOPKG ;
if ( ! vlan_ioctl_hook )
request_module ( " 8021q " ) ;
2006-03-21 09:33:17 +03:00
mutex_lock ( & vlan_ioctl_mutex ) ;
2005-04-17 02:20:36 +04:00
if ( vlan_ioctl_hook )
2007-09-17 22:56:21 +04:00
err = vlan_ioctl_hook ( net , argp ) ;
2006-03-21 09:33:17 +03:00
mutex_unlock ( & vlan_ioctl_mutex ) ;
2005-04-17 02:20:36 +04:00
break ;
case SIOCADDDLCI :
case SIOCDELDLCI :
err = - ENOPKG ;
if ( ! dlci_ioctl_hook )
request_module ( " dlci " ) ;
2008-03-22 01:58:52 +03:00
mutex_lock ( & dlci_ioctl_mutex ) ;
if ( dlci_ioctl_hook )
2005-04-17 02:20:36 +04:00
err = dlci_ioctl_hook ( cmd , argp ) ;
2008-03-22 01:58:52 +03:00
mutex_unlock ( & dlci_ioctl_mutex ) ;
2005-04-17 02:20:36 +04:00
break ;
default :
2009-11-07 10:10:54 +03:00
err = sock_do_ioctl ( net , sock , cmd , arg ) ;
2005-04-17 02:20:36 +04:00
break ;
2006-09-01 11:19:31 +04:00
}
2005-04-17 02:20:36 +04:00
return err ;
}
int sock_create_lite ( int family , int type , int protocol , struct socket * * res )
{
int err ;
struct socket * sock = NULL ;
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
err = security_socket_create ( family , type , protocol , 1 ) ;
if ( err )
goto out ;
sock = sock_alloc ( ) ;
if ( ! sock ) {
err = - ENOMEM ;
goto out ;
}
sock - > type = type ;
2006-08-05 10:17:57 +04:00
err = security_socket_post_create ( sock , family , type , protocol , 1 ) ;
if ( err )
goto out_release ;
2005-04-17 02:20:36 +04:00
out :
* res = sock ;
return err ;
2006-08-05 10:17:57 +04:00
out_release :
sock_release ( sock ) ;
sock = NULL ;
goto out ;
2005-04-17 02:20:36 +04:00
}
/* No kernel lock held - perfect */
2006-09-01 11:19:31 +04:00
static unsigned int sock_poll ( struct file * file , poll_table * wait )
2005-04-17 02:20:36 +04:00
{
struct socket * sock ;
/*
2006-09-01 11:19:31 +04:00
* We can ' t return errors to poll , so it ' s either yes or no .
2005-04-17 02:20:36 +04:00
*/
2005-09-07 01:42:45 +04:00
sock = file - > private_data ;
2005-04-17 02:20:36 +04:00
return sock - > ops - > poll ( file , sock , wait ) ;
}
2006-09-01 11:19:31 +04:00
static int sock_mmap ( struct file * file , struct vm_area_struct * vma )
2005-04-17 02:20:36 +04:00
{
2005-09-07 01:42:45 +04:00
struct socket * sock = file - > private_data ;
2005-04-17 02:20:36 +04:00
return sock - > ops - > mmap ( file , sock , vma ) ;
}
2005-08-16 09:18:02 +04:00
static int sock_close ( struct inode * inode , struct file * filp )
2005-04-17 02:20:36 +04:00
{
/*
2006-09-01 11:19:31 +04:00
* It was possible the inode is NULL we were
* closing an unfinished socket .
2005-04-17 02:20:36 +04:00
*/
2006-09-01 11:19:31 +04:00
if ( ! inode ) {
2005-04-17 02:20:36 +04:00
printk ( KERN_DEBUG " sock_close: NULL inode \n " ) ;
return 0 ;
}
sock_release ( SOCKET_I ( inode ) ) ;
return 0 ;
}
/*
* Update the socket async list
*
* Fasync_list locking strategy .
*
* 1. fasync_list is modified only under process context socket lock
* i . e . under semaphore .
* 2. fasync_list is used under read_lock ( & sk - > sk_callback_lock )
* or under socket lock .
* 3. fasync_list can be used from softirq context , so that
* modification under socket lock have to be enhanced with
* write_lock_bh ( & sk - > sk_callback_lock ) .
* - - ANK ( 990710 )
*/
static int sock_fasync ( int fd , struct file * filp , int on )
{
2006-09-01 11:19:31 +04:00
struct fasync_struct * fa , * fna = NULL , * * prev ;
2005-04-17 02:20:36 +04:00
struct socket * sock ;
struct sock * sk ;
2006-09-01 11:19:31 +04:00
if ( on ) {
2006-01-12 02:56:43 +03:00
fna = kmalloc ( sizeof ( struct fasync_struct ) , GFP_KERNEL ) ;
2006-09-01 11:19:31 +04:00
if ( fna = = NULL )
2005-04-17 02:20:36 +04:00
return - ENOMEM ;
}
2005-09-07 01:42:45 +04:00
sock = filp - > private_data ;
2005-04-17 02:20:36 +04:00
2006-09-01 11:19:31 +04:00
sk = sock - > sk ;
if ( sk = = NULL ) {
2005-04-17 02:20:36 +04:00
kfree ( fna ) ;
return - EINVAL ;
}
lock_sock ( sk ) ;
2009-02-02 00:26:59 +03:00
spin_lock ( & filp - > f_lock ) ;
if ( on )
filp - > f_flags | = FASYNC ;
else
filp - > f_flags & = ~ FASYNC ;
spin_unlock ( & filp - > f_lock ) ;
2006-09-01 11:19:31 +04:00
prev = & ( sock - > fasync_list ) ;
2005-04-17 02:20:36 +04:00
2006-09-01 11:19:31 +04:00
for ( fa = * prev ; fa ! = NULL ; prev = & fa - > fa_next , fa = * prev )
if ( fa - > fa_file = = filp )
2005-04-17 02:20:36 +04:00
break ;
2006-09-01 11:19:31 +04:00
if ( on ) {
if ( fa ! = NULL ) {
2005-04-17 02:20:36 +04:00
write_lock_bh ( & sk - > sk_callback_lock ) ;
2006-09-01 11:19:31 +04:00
fa - > fa_fd = fd ;
2005-04-17 02:20:36 +04:00
write_unlock_bh ( & sk - > sk_callback_lock ) ;
kfree ( fna ) ;
goto out ;
}
2006-09-01 11:19:31 +04:00
fna - > fa_file = filp ;
fna - > fa_fd = fd ;
fna - > magic = FASYNC_MAGIC ;
fna - > fa_next = sock - > fasync_list ;
2005-04-17 02:20:36 +04:00
write_lock_bh ( & sk - > sk_callback_lock ) ;
2006-09-01 11:19:31 +04:00
sock - > fasync_list = fna ;
net: speedup sk_wake_async()
An incoming datagram must bring into cpu cache *lot* of cache lines,
in particular : (other parts omitted (hash chains, ip route cache...))
On 32bit arches :
offsetof(struct sock, sk_rcvbuf) =0x30 (read)
offsetof(struct sock, sk_lock) =0x34 (rw)
offsetof(struct sock, sk_sleep) =0x50 (read)
offsetof(struct sock, sk_rmem_alloc) =0x64 (rw)
offsetof(struct sock, sk_receive_queue)=0x74 (rw)
offsetof(struct sock, sk_forward_alloc)=0x98 (rw)
offsetof(struct sock, sk_callback_lock)=0xcc (rw)
offsetof(struct sock, sk_drops) =0xd8 (read if we add dropcount support, rw if frame dropped)
offsetof(struct sock, sk_filter) =0xf8 (read)
offsetof(struct sock, sk_socket) =0x138 (read)
offsetof(struct sock, sk_data_ready) =0x15c (read)
We can avoid sk->sk_socket and socket->fasync_list referencing on sockets
with no fasync() structures. (socket->fasync_list ptr is probably already in cache
because it shares a cache line with socket->wait, ie location pointed by sk->sk_sleep)
This avoids one cache line load per incoming packet for common cases (no fasync())
We can leave (or even move in a future patch) sk->sk_socket in a cold location
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2009-10-07 04:28:29 +04:00
sock_set_flag ( sk , SOCK_FASYNC ) ;
2005-04-17 02:20:36 +04:00
write_unlock_bh ( & sk - > sk_callback_lock ) ;
2006-09-01 11:19:31 +04:00
} else {
if ( fa ! = NULL ) {
2005-04-17 02:20:36 +04:00
write_lock_bh ( & sk - > sk_callback_lock ) ;
2006-09-01 11:19:31 +04:00
* prev = fa - > fa_next ;
net: speedup sk_wake_async()
An incoming datagram must bring into cpu cache *lot* of cache lines,
in particular : (other parts omitted (hash chains, ip route cache...))
On 32bit arches :
offsetof(struct sock, sk_rcvbuf) =0x30 (read)
offsetof(struct sock, sk_lock) =0x34 (rw)
offsetof(struct sock, sk_sleep) =0x50 (read)
offsetof(struct sock, sk_rmem_alloc) =0x64 (rw)
offsetof(struct sock, sk_receive_queue)=0x74 (rw)
offsetof(struct sock, sk_forward_alloc)=0x98 (rw)
offsetof(struct sock, sk_callback_lock)=0xcc (rw)
offsetof(struct sock, sk_drops) =0xd8 (read if we add dropcount support, rw if frame dropped)
offsetof(struct sock, sk_filter) =0xf8 (read)
offsetof(struct sock, sk_socket) =0x138 (read)
offsetof(struct sock, sk_data_ready) =0x15c (read)
We can avoid sk->sk_socket and socket->fasync_list referencing on sockets
with no fasync() structures. (socket->fasync_list ptr is probably already in cache
because it shares a cache line with socket->wait, ie location pointed by sk->sk_sleep)
This avoids one cache line load per incoming packet for common cases (no fasync())
We can leave (or even move in a future patch) sk->sk_socket in a cold location
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2009-10-07 04:28:29 +04:00
if ( ! sock - > fasync_list )
sock_reset_flag ( sk , SOCK_FASYNC ) ;
2005-04-17 02:20:36 +04:00
write_unlock_bh ( & sk - > sk_callback_lock ) ;
kfree ( fa ) ;
}
}
out :
release_sock ( sock - > sk ) ;
return 0 ;
}
/* This function may be called only under socket lock or callback_lock */
int sock_wake_async ( struct socket * sock , int how , int band )
{
if ( ! sock | | ! sock - > fasync_list )
return - 1 ;
2006-09-01 11:19:31 +04:00
switch ( how ) {
2007-11-26 15:10:50 +03:00
case SOCK_WAKE_WAITD :
2005-04-17 02:20:36 +04:00
if ( test_bit ( SOCK_ASYNC_WAITDATA , & sock - > flags ) )
break ;
goto call_kill ;
2007-11-26 15:10:50 +03:00
case SOCK_WAKE_SPACE :
2005-04-17 02:20:36 +04:00
if ( ! test_and_clear_bit ( SOCK_ASYNC_NOSPACE , & sock - > flags ) )
break ;
/* fall through */
2007-11-26 15:10:50 +03:00
case SOCK_WAKE_IO :
2006-09-01 11:19:31 +04:00
call_kill :
2005-04-17 02:20:36 +04:00
__kill_fasync ( sock - > fasync_list , SIGIO , band ) ;
break ;
2007-11-26 15:10:50 +03:00
case SOCK_WAKE_URG :
2005-04-17 02:20:36 +04:00
__kill_fasync ( sock - > fasync_list , SIGURG , band ) ;
}
return 0 ;
}
2007-10-09 10:24:22 +04:00
static int __sock_create ( struct net * net , int family , int type , int protocol ,
2006-09-01 11:19:31 +04:00
struct socket * * res , int kern )
2005-04-17 02:20:36 +04:00
{
int err ;
struct socket * sock ;
2006-09-01 11:23:39 +04:00
const struct net_proto_family * pf ;
2005-04-17 02:20:36 +04:00
/*
2006-09-01 11:19:31 +04:00
* Check protocol is in range
2005-04-17 02:20:36 +04:00
*/
if ( family < 0 | | family > = NPROTO )
return - EAFNOSUPPORT ;
if ( type < 0 | | type > = SOCK_MAX )
return - EINVAL ;
/* Compatibility.
This uglymoron is moved from INET layer to here to avoid
deadlock in module load .
*/
if ( family = = PF_INET & & type = = SOCK_PACKET ) {
2006-09-01 11:19:31 +04:00
static int warned ;
2005-04-17 02:20:36 +04:00
if ( ! warned ) {
warned = 1 ;
2006-09-01 11:19:31 +04:00
printk ( KERN_INFO " %s uses obsolete (PF_INET,SOCK_PACKET) \n " ,
current - > comm ) ;
2005-04-17 02:20:36 +04:00
}
family = PF_PACKET ;
}
err = security_socket_create ( family , type , protocol , kern ) ;
if ( err )
return err ;
2006-09-01 11:19:31 +04:00
2006-09-01 11:23:39 +04:00
/*
* Allocate the socket and allow the family to set things up . if
* the protocol is 0 , the family is instructed to select an appropriate
* default .
*/
sock = sock_alloc ( ) ;
if ( ! sock ) {
if ( net_ratelimit ( ) )
printk ( KERN_WARNING " socket: no more sockets \n " ) ;
return - ENFILE ; /* Not exactly a match, but its the
closest posix thing */
}
sock - > type = type ;
2008-10-17 02:24:51 +04:00
# ifdef CONFIG_MODULES
2006-09-01 11:19:31 +04:00
/* Attempt to load a protocol module if the find failed.
*
* 12 / 09 / 1996 Marcin : But ! this makes REALLY only sense , if the user
2005-04-17 02:20:36 +04:00
* requested real , full - featured networking support upon configuration .
* Otherwise module support will break !
*/
2006-09-01 11:23:39 +04:00
if ( net_families [ family ] = = NULL )
2006-09-01 11:19:31 +04:00
request_module ( " net-pf-%d " , family ) ;
2005-04-17 02:20:36 +04:00
# endif
2006-09-01 11:23:39 +04:00
rcu_read_lock ( ) ;
pf = rcu_dereference ( net_families [ family ] ) ;
err = - EAFNOSUPPORT ;
if ( ! pf )
goto out_release ;
2005-04-17 02:20:36 +04:00
/*
* We will call the - > create function , that possibly is in a loadable
* module , so we have to bump that loadable module refcnt first .
*/
2006-09-01 11:23:39 +04:00
if ( ! try_module_get ( pf - > owner ) )
2005-04-17 02:20:36 +04:00
goto out_release ;
2006-09-01 11:23:39 +04:00
/* Now protected by module ref count */
rcu_read_unlock ( ) ;
2009-11-06 09:18:14 +03:00
err = pf - > create ( net , sock , protocol , kern ) ;
2006-09-01 11:23:39 +04:00
if ( err < 0 )
2005-04-17 02:20:36 +04:00
goto out_module_put ;
2005-09-28 02:23:38 +04:00
2005-04-17 02:20:36 +04:00
/*
* Now to bump the refcnt of the [ loadable ] module that owns this
* socket at sock_release time we decrement its refcnt .
*/
2006-09-01 11:23:39 +04:00
if ( ! try_module_get ( sock - > ops - > owner ) )
goto out_module_busy ;
2005-04-17 02:20:36 +04:00
/*
* Now that we ' re done with the - > create function , the [ loadable ]
* module can have its refcnt decremented
*/
2006-09-01 11:23:39 +04:00
module_put ( pf - > owner ) ;
2006-08-05 10:17:57 +04:00
err = security_socket_post_create ( sock , family , type , protocol , kern ) ;
if ( err )
2007-08-16 01:46:02 +04:00
goto out_sock_release ;
2006-09-01 11:23:39 +04:00
* res = sock ;
2005-04-17 02:20:36 +04:00
2006-09-01 11:23:39 +04:00
return 0 ;
out_module_busy :
err = - EAFNOSUPPORT ;
2005-04-17 02:20:36 +04:00
out_module_put :
2006-09-01 11:23:39 +04:00
sock - > ops = NULL ;
module_put ( pf - > owner ) ;
out_sock_release :
2005-04-17 02:20:36 +04:00
sock_release ( sock ) ;
2006-09-01 11:23:39 +04:00
return err ;
out_release :
rcu_read_unlock ( ) ;
goto out_sock_release ;
2005-04-17 02:20:36 +04:00
}
int sock_create ( int family , int type , int protocol , struct socket * * res )
{
2007-10-09 10:24:22 +04:00
return __sock_create ( current - > nsproxy - > net_ns , family , type , protocol , res , 0 ) ;
2005-04-17 02:20:36 +04:00
}
int sock_create_kern ( int family , int type , int protocol , struct socket * * res )
{
2007-10-09 10:24:22 +04:00
return __sock_create ( & init_net , family , type , protocol , res , 1 ) ;
2005-04-17 02:20:36 +04:00
}
2009-01-14 16:14:24 +03:00
SYSCALL_DEFINE3 ( socket , int , family , int , type , int , protocol )
2005-04-17 02:20:36 +04:00
{
int retval ;
struct socket * sock ;
flag parameters: socket and socketpair
This patch adds support for flag values which are ORed to the type passwd
to socket and socketpair. The additional code is minimal. The flag
values in this implementation can and must match the O_* flags. This
avoids overhead in the conversion.
The internal functions sock_alloc_fd and sock_map_fd get a new parameters
and all callers are changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#define PORT 57392
/* For Linux these must be the same. */
#define SOCK_CLOEXEC O_CLOEXEC
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("socket(0) set close-on-exec flag");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
if (fd == -1)
{
puts ("socket(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
{
puts ("socketpair(SOCK_CLOEXEC) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:17 +04:00
int flags ;
2008-07-24 08:29:42 +04:00
/* Check the SOCK_* constants for consistency. */
BUILD_BUG_ON ( SOCK_CLOEXEC ! = O_CLOEXEC ) ;
BUILD_BUG_ON ( ( SOCK_MAX | SOCK_TYPE_MASK ) ! = SOCK_TYPE_MASK ) ;
BUILD_BUG_ON ( SOCK_CLOEXEC & SOCK_TYPE_MASK ) ;
BUILD_BUG_ON ( SOCK_NONBLOCK & SOCK_TYPE_MASK ) ;
flag parameters: socket and socketpair
This patch adds support for flag values which are ORed to the type passwd
to socket and socketpair. The additional code is minimal. The flag
values in this implementation can and must match the O_* flags. This
avoids overhead in the conversion.
The internal functions sock_alloc_fd and sock_map_fd get a new parameters
and all callers are changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#define PORT 57392
/* For Linux these must be the same. */
#define SOCK_CLOEXEC O_CLOEXEC
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("socket(0) set close-on-exec flag");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
if (fd == -1)
{
puts ("socket(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
{
puts ("socketpair(SOCK_CLOEXEC) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:17 +04:00
flags = type & ~ SOCK_TYPE_MASK ;
flag parameters: NONBLOCK in socket and socketpair
This patch introduces support for the SOCK_NONBLOCK flag in socket,
socketpair, and paccept. To do this the internal function sock_attach_fd
gets an additional parameter which it uses to set the appropriate flag for
the file descriptor.
Given that in modern, scalable programs almost all socket connections are
non-blocking and the minimal additional cost for the new functionality
I see no reason not to add this code.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_NONBLOCK O_NONBLOCK
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
return NULL;
}
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
puts ("socket(0) set non-blocking mode");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0);
if (fd == -1)
{
puts ("socket(SOCK_NONBLOCK) failed");
return 1;
}
fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
fl = fcntl (fds[i], F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1)
{
puts ("socketpair(SOCK_NONBLOCK) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
fl = fcntl (fds[i], F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, NULL) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
fl = fcntl (s2, F_GETFL);
if (fl & O_NONBLOCK)
{
puts ("paccept(0) set non-blocking mode");
return 1;
}
close (s2);
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK);
if (s2 < 0)
{
puts ("paccept(SOCK_NONBLOCK) failed");
return 1;
}
fl = fcntl (s2, F_GETFL);
if ((fl & O_NONBLOCK) == 0)
{
puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode");
return 1;
}
close (s2);
close (s);
pthread_barrier_wait (&b);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:35 +04:00
if ( flags & ~ ( SOCK_CLOEXEC | SOCK_NONBLOCK ) )
flag parameters: socket and socketpair
This patch adds support for flag values which are ORed to the type passwd
to socket and socketpair. The additional code is minimal. The flag
values in this implementation can and must match the O_* flags. This
avoids overhead in the conversion.
The internal functions sock_alloc_fd and sock_map_fd get a new parameters
and all callers are changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#define PORT 57392
/* For Linux these must be the same. */
#define SOCK_CLOEXEC O_CLOEXEC
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("socket(0) set close-on-exec flag");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
if (fd == -1)
{
puts ("socket(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
{
puts ("socketpair(SOCK_CLOEXEC) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:17 +04:00
return - EINVAL ;
type & = SOCK_TYPE_MASK ;
2005-04-17 02:20:36 +04:00
flag parameters: paccept
This patch is by far the most complex in the series. It adds a new syscall
paccept. This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:
- a signal mask
- a flags value
The flags parameter can be used to set flag like SOCK_CLOEXEC. This is
imlpemented here as well. Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX. Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc). So an interface change in inevitable.
The flag value is the same as for socket and socketpair. I think diverging
here will only create confusion. Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.
The signal mask is handled as for pselect etc. The mask is temporarily
installed for the thread and removed before the call returns. I modeled the
code after pselect. If there is a problem it's likely also in pselect.
For architectures which use socketcall I maintained this interface instead of
adding a system call. The symmetry shouldn't be broken.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_CLOEXEC O_CLOEXEC
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
sleep (2);
pthread_kill ((pthread_t) arg, SIGUSR1);
return NULL;
}
static void
handler (int s)
{
}
int
main (void)
{
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
int coe = fcntl (s2, F_GETFD);
if (coe & FD_CLOEXEC)
{
puts ("paccept(0) set close-on-exec-flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
if (s2 < 0)
{
puts ("paccept(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (s2, F_GETFD);
if ((coe & FD_CLOEXEC) == 0)
{
puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
struct sigaction sa;
sa.sa_handler = handler;
sa.sa_flags = 0;
sigemptyset (&sa.sa_mask);
sigaction (SIGUSR1, &sa, NULL);
sigset_t ss;
pthread_sigmask (SIG_SETMASK, NULL, &ss);
sigaddset (&ss, SIGUSR1);
pthread_sigmask (SIG_SETMASK, &ss, NULL);
sigdelset (&ss, SIGUSR1);
alarm (4);
pthread_barrier_wait (&b);
errno = 0 ;
s2 = paccept (s, NULL, 0, &ss, 0);
if (s2 != -1 || errno != EINTR)
{
puts ("paccept did not fail with EINTR");
return 1;
}
close (s);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:20 +04:00
if ( SOCK_NONBLOCK ! = O_NONBLOCK & & ( flags & SOCK_NONBLOCK ) )
flags = ( flags & ~ SOCK_NONBLOCK ) | O_NONBLOCK ;
2005-04-17 02:20:36 +04:00
retval = sock_create ( family , type , protocol , & sock ) ;
if ( retval < 0 )
goto out ;
flag parameters: NONBLOCK in socket and socketpair
This patch introduces support for the SOCK_NONBLOCK flag in socket,
socketpair, and paccept. To do this the internal function sock_attach_fd
gets an additional parameter which it uses to set the appropriate flag for
the file descriptor.
Given that in modern, scalable programs almost all socket connections are
non-blocking and the minimal additional cost for the new functionality
I see no reason not to add this code.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_NONBLOCK O_NONBLOCK
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
return NULL;
}
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
puts ("socket(0) set non-blocking mode");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0);
if (fd == -1)
{
puts ("socket(SOCK_NONBLOCK) failed");
return 1;
}
fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
fl = fcntl (fds[i], F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1)
{
puts ("socketpair(SOCK_NONBLOCK) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
fl = fcntl (fds[i], F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, NULL) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
fl = fcntl (s2, F_GETFL);
if (fl & O_NONBLOCK)
{
puts ("paccept(0) set non-blocking mode");
return 1;
}
close (s2);
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK);
if (s2 < 0)
{
puts ("paccept(SOCK_NONBLOCK) failed");
return 1;
}
fl = fcntl (s2, F_GETFL);
if ((fl & O_NONBLOCK) == 0)
{
puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode");
return 1;
}
close (s2);
close (s);
pthread_barrier_wait (&b);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:35 +04:00
retval = sock_map_fd ( sock , flags & ( O_CLOEXEC | O_NONBLOCK ) ) ;
2005-04-17 02:20:36 +04:00
if ( retval < 0 )
goto out_release ;
out :
/* It may be already another descriptor 8) Not kernel problem. */
return retval ;
out_release :
sock_release ( sock ) ;
return retval ;
}
/*
* Create a pair of connected sockets .
*/
2009-01-14 16:14:24 +03:00
SYSCALL_DEFINE4 ( socketpair , int , family , int , type , int , protocol ,
int __user * , usockvec )
2005-04-17 02:20:36 +04:00
{
struct socket * sock1 , * sock2 ;
int fd1 , fd2 , err ;
2007-02-07 09:48:00 +03:00
struct file * newfile1 , * newfile2 ;
flag parameters: socket and socketpair
This patch adds support for flag values which are ORed to the type passwd
to socket and socketpair. The additional code is minimal. The flag
values in this implementation can and must match the O_* flags. This
avoids overhead in the conversion.
The internal functions sock_alloc_fd and sock_map_fd get a new parameters
and all callers are changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#define PORT 57392
/* For Linux these must be the same. */
#define SOCK_CLOEXEC O_CLOEXEC
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("socket(0) set close-on-exec flag");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
if (fd == -1)
{
puts ("socket(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
{
puts ("socketpair(SOCK_CLOEXEC) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:17 +04:00
int flags ;
flags = type & ~ SOCK_TYPE_MASK ;
flag parameters: NONBLOCK in socket and socketpair
This patch introduces support for the SOCK_NONBLOCK flag in socket,
socketpair, and paccept. To do this the internal function sock_attach_fd
gets an additional parameter which it uses to set the appropriate flag for
the file descriptor.
Given that in modern, scalable programs almost all socket connections are
non-blocking and the minimal additional cost for the new functionality
I see no reason not to add this code.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_NONBLOCK O_NONBLOCK
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
return NULL;
}
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
puts ("socket(0) set non-blocking mode");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0);
if (fd == -1)
{
puts ("socket(SOCK_NONBLOCK) failed");
return 1;
}
fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
fl = fcntl (fds[i], F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1)
{
puts ("socketpair(SOCK_NONBLOCK) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
fl = fcntl (fds[i], F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, NULL) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
fl = fcntl (s2, F_GETFL);
if (fl & O_NONBLOCK)
{
puts ("paccept(0) set non-blocking mode");
return 1;
}
close (s2);
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK);
if (s2 < 0)
{
puts ("paccept(SOCK_NONBLOCK) failed");
return 1;
}
fl = fcntl (s2, F_GETFL);
if ((fl & O_NONBLOCK) == 0)
{
puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode");
return 1;
}
close (s2);
close (s);
pthread_barrier_wait (&b);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:35 +04:00
if ( flags & ~ ( SOCK_CLOEXEC | SOCK_NONBLOCK ) )
flag parameters: socket and socketpair
This patch adds support for flag values which are ORed to the type passwd
to socket and socketpair. The additional code is minimal. The flag
values in this implementation can and must match the O_* flags. This
avoids overhead in the conversion.
The internal functions sock_alloc_fd and sock_map_fd get a new parameters
and all callers are changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#define PORT 57392
/* For Linux these must be the same. */
#define SOCK_CLOEXEC O_CLOEXEC
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
puts ("socket(0) set close-on-exec flag");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_CLOEXEC, 0);
if (fd == -1)
{
puts ("socket(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (fd, F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
puts ("socket(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if (coe & FD_CLOEXEC)
{
printf ("socketpair(0) set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0, fds) == -1)
{
puts ("socketpair(SOCK_CLOEXEC) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
coe = fcntl (fds[i], F_GETFD);
if (coe == -1)
{
puts ("fcntl failed");
return 1;
}
if ((coe & FD_CLOEXEC) == 0)
{
printf ("socketpair(SOCK_CLOEXEC) does not set close-on-exec flag for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:17 +04:00
return - EINVAL ;
type & = SOCK_TYPE_MASK ;
2005-04-17 02:20:36 +04:00
flag parameters: paccept
This patch is by far the most complex in the series. It adds a new syscall
paccept. This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:
- a signal mask
- a flags value
The flags parameter can be used to set flag like SOCK_CLOEXEC. This is
imlpemented here as well. Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX. Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc). So an interface change in inevitable.
The flag value is the same as for socket and socketpair. I think diverging
here will only create confusion. Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.
The signal mask is handled as for pselect etc. The mask is temporarily
installed for the thread and removed before the call returns. I modeled the
code after pselect. If there is a problem it's likely also in pselect.
For architectures which use socketcall I maintained this interface instead of
adding a system call. The symmetry shouldn't be broken.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_CLOEXEC O_CLOEXEC
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
sleep (2);
pthread_kill ((pthread_t) arg, SIGUSR1);
return NULL;
}
static void
handler (int s)
{
}
int
main (void)
{
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
int coe = fcntl (s2, F_GETFD);
if (coe & FD_CLOEXEC)
{
puts ("paccept(0) set close-on-exec-flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
if (s2 < 0)
{
puts ("paccept(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (s2, F_GETFD);
if ((coe & FD_CLOEXEC) == 0)
{
puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
struct sigaction sa;
sa.sa_handler = handler;
sa.sa_flags = 0;
sigemptyset (&sa.sa_mask);
sigaction (SIGUSR1, &sa, NULL);
sigset_t ss;
pthread_sigmask (SIG_SETMASK, NULL, &ss);
sigaddset (&ss, SIGUSR1);
pthread_sigmask (SIG_SETMASK, &ss, NULL);
sigdelset (&ss, SIGUSR1);
alarm (4);
pthread_barrier_wait (&b);
errno = 0 ;
s2 = paccept (s, NULL, 0, &ss, 0);
if (s2 != -1 || errno != EINTR)
{
puts ("paccept did not fail with EINTR");
return 1;
}
close (s);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:20 +04:00
if ( SOCK_NONBLOCK ! = O_NONBLOCK & & ( flags & SOCK_NONBLOCK ) )
flags = ( flags & ~ SOCK_NONBLOCK ) | O_NONBLOCK ;
2005-04-17 02:20:36 +04:00
/*
* Obtain the first socket and check if the underlying protocol
* supports the socketpair call .
*/
err = sock_create ( family , type , protocol , & sock1 ) ;
if ( err < 0 )
goto out ;
err = sock_create ( family , type , protocol , & sock2 ) ;
if ( err < 0 )
goto out_release_1 ;
err = sock1 - > ops - > socketpair ( sock1 , sock2 ) ;
2006-09-01 11:19:31 +04:00
if ( err < 0 )
2005-04-17 02:20:36 +04:00
goto out_release_both ;
2009-08-05 19:59:08 +04:00
fd1 = sock_alloc_file ( sock1 , & newfile1 , flags ) ;
2007-10-30 07:54:02 +03:00
if ( unlikely ( fd1 < 0 ) ) {
err = fd1 ;
2007-02-07 09:48:00 +03:00
goto out_release_both ;
2007-10-30 07:54:02 +03:00
}
2005-04-17 02:20:36 +04:00
2009-08-05 19:59:08 +04:00
fd2 = sock_alloc_file ( sock2 , & newfile2 , flags ) ;
2009-08-05 19:29:23 +04:00
if ( unlikely ( fd2 < 0 ) ) {
err = fd2 ;
fput ( newfile1 ) ;
put_unused_fd ( fd1 ) ;
sock_release ( sock2 ) ;
goto out ;
2007-02-07 09:48:00 +03:00
}
2008-12-14 12:57:47 +03:00
audit_fd_pair ( fd1 , fd2 ) ;
2007-02-07 09:48:00 +03:00
fd_install ( fd1 , newfile1 ) ;
fd_install ( fd2 , newfile2 ) ;
2005-04-17 02:20:36 +04:00
/* fd1 and fd2 may be already another descriptors.
* Not kernel problem .
*/
2006-09-01 11:19:31 +04:00
err = put_user ( fd1 , & usockvec [ 0 ] ) ;
2005-04-17 02:20:36 +04:00
if ( ! err )
err = put_user ( fd2 , & usockvec [ 1 ] ) ;
if ( ! err )
return 0 ;
sys_close ( fd2 ) ;
sys_close ( fd1 ) ;
return err ;
out_release_both :
2006-09-01 11:19:31 +04:00
sock_release ( sock2 ) ;
2005-04-17 02:20:36 +04:00
out_release_1 :
2006-09-01 11:19:31 +04:00
sock_release ( sock1 ) ;
2005-04-17 02:20:36 +04:00
out :
return err ;
}
/*
* Bind a name to a socket . Nothing much to do here since it ' s
* the protocol ' s responsibility to handle the local address .
*
* We move the socket address to kernel space before we call
* the protocol layer ( having also checked the address is ok ) .
*/
2009-01-14 16:14:23 +03:00
SYSCALL_DEFINE3 ( bind , int , fd , struct sockaddr __user * , umyaddr , int , addrlen )
2005-04-17 02:20:36 +04:00
{
struct socket * sock ;
2008-07-20 09:35:47 +04:00
struct sockaddr_storage address ;
2006-03-21 09:27:12 +03:00
int err , fput_needed ;
2005-04-17 02:20:36 +04:00
2006-09-01 11:19:31 +04:00
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
2007-04-11 07:10:33 +04:00
if ( sock ) {
2008-07-20 09:35:47 +04:00
err = move_addr_to_kernel ( umyaddr , addrlen , ( struct sockaddr * ) & address ) ;
2006-09-01 11:19:31 +04:00
if ( err > = 0 ) {
err = security_socket_bind ( sock ,
2008-07-20 09:35:47 +04:00
( struct sockaddr * ) & address ,
2006-09-01 11:19:31 +04:00
addrlen ) ;
2006-03-21 09:27:12 +03:00
if ( ! err )
err = sock - > ops - > bind ( sock ,
2006-09-01 11:19:31 +04:00
( struct sockaddr * )
2008-07-20 09:35:47 +04:00
& address , addrlen ) ;
2005-04-17 02:20:36 +04:00
}
2006-03-21 09:27:12 +03:00
fput_light ( sock - > file , fput_needed ) ;
2006-09-01 11:19:31 +04:00
}
2005-04-17 02:20:36 +04:00
return err ;
}
/*
* Perform a listen . Basically , we allow the protocol to do anything
* necessary for a listen , and if that works , we mark the socket as
* ready for listening .
*/
2009-01-14 16:14:24 +03:00
SYSCALL_DEFINE2 ( listen , int , fd , int , backlog )
2005-04-17 02:20:36 +04:00
{
struct socket * sock ;
2006-03-21 09:27:12 +03:00
int err , fput_needed ;
2007-12-08 11:12:33 +03:00
int somaxconn ;
2006-09-01 11:19:31 +04:00
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
if ( sock ) {
2008-04-01 06:41:14 +04:00
somaxconn = sock_net ( sock - > sk ) - > core . sysctl_somaxconn ;
2007-12-08 11:12:33 +03:00
if ( ( unsigned ) backlog > somaxconn )
backlog = somaxconn ;
2005-04-17 02:20:36 +04:00
err = security_socket_listen ( sock , backlog ) ;
2006-03-21 09:27:12 +03:00
if ( ! err )
err = sock - > ops - > listen ( sock , backlog ) ;
2005-04-17 02:20:36 +04:00
2006-03-21 09:27:12 +03:00
fput_light ( sock - > file , fput_needed ) ;
2005-04-17 02:20:36 +04:00
}
return err ;
}
/*
* For accept , we attempt to create a new socket , set up the link
* with the client , wake up the client , then return the new
* connected fd . We collect the address of the connector in kernel
* space and move it to user at the very end . This is unclean because
* we open the socket then return an error .
*
* 1003.1 g adds the ability to recvmsg ( ) to query connection pending
* status to recvmsg . We need to add that support in a way thats
* clean when we restucture accept also .
*/
2009-01-14 16:14:23 +03:00
SYSCALL_DEFINE4 ( accept4 , int , fd , struct sockaddr __user * , upeer_sockaddr ,
int __user * , upeer_addrlen , int , flags )
2005-04-17 02:20:36 +04:00
{
struct socket * sock , * newsock ;
2006-03-21 04:13:49 +03:00
struct file * newfile ;
2006-03-21 09:27:12 +03:00
int err , len , newfd , fput_needed ;
2008-07-20 09:35:47 +04:00
struct sockaddr_storage address ;
2005-04-17 02:20:36 +04:00
flag parameters: NONBLOCK in socket and socketpair
This patch introduces support for the SOCK_NONBLOCK flag in socket,
socketpair, and paccept. To do this the internal function sock_attach_fd
gets an additional parameter which it uses to set the appropriate flag for
the file descriptor.
Given that in modern, scalable programs almost all socket connections are
non-blocking and the minimal additional cost for the new functionality
I see no reason not to add this code.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <fcntl.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_NONBLOCK O_NONBLOCK
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
return NULL;
}
int
main (void)
{
int fd;
fd = socket (PF_INET, SOCK_STREAM, 0);
if (fd == -1)
{
puts ("socket(0) failed");
return 1;
}
int fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
puts ("socket(0) set non-blocking mode");
return 1;
}
close (fd);
fd = socket (PF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0);
if (fd == -1)
{
puts ("socket(SOCK_NONBLOCK) failed");
return 1;
}
fl = fcntl (fd, F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
puts ("socket(SOCK_NONBLOCK) does not set non-blocking mode");
return 1;
}
close (fd);
int fds[2];
if (socketpair (PF_UNIX, SOCK_STREAM, 0, fds) == -1)
{
puts ("socketpair(0) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
fl = fcntl (fds[i], F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if (fl & O_NONBLOCK)
{
printf ("socketpair(0) set non-blocking mode for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
if (socketpair (PF_UNIX, SOCK_STREAM|SOCK_NONBLOCK, 0, fds) == -1)
{
puts ("socketpair(SOCK_NONBLOCK) failed");
return 1;
}
for (int i = 0; i < 2; ++i)
{
fl = fcntl (fds[i], F_GETFL);
if (fl == -1)
{
puts ("fcntl failed");
return 1;
}
if ((fl & O_NONBLOCK) == 0)
{
printf ("socketpair(SOCK_NONBLOCK) does not set non-blocking mode for fds[%d]\n", i);
return 1;
}
close (fds[i]);
}
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, NULL) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
fl = fcntl (s2, F_GETFL);
if (fl & O_NONBLOCK)
{
puts ("paccept(0) set non-blocking mode");
return 1;
}
close (s2);
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_NONBLOCK);
if (s2 < 0)
{
puts ("paccept(SOCK_NONBLOCK) failed");
return 1;
}
fl = fcntl (s2, F_GETFL);
if ((fl & O_NONBLOCK) == 0)
{
puts ("paccept(SOCK_NONBLOCK) does not set non-blocking mode");
return 1;
}
close (s2);
close (s);
pthread_barrier_wait (&b);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:35 +04:00
if ( flags & ~ ( SOCK_CLOEXEC | SOCK_NONBLOCK ) )
flag parameters: paccept
This patch is by far the most complex in the series. It adds a new syscall
paccept. This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:
- a signal mask
- a flags value
The flags parameter can be used to set flag like SOCK_CLOEXEC. This is
imlpemented here as well. Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX. Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc). So an interface change in inevitable.
The flag value is the same as for socket and socketpair. I think diverging
here will only create confusion. Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.
The signal mask is handled as for pselect etc. The mask is temporarily
installed for the thread and removed before the call returns. I modeled the
code after pselect. If there is a problem it's likely also in pselect.
For architectures which use socketcall I maintained this interface instead of
adding a system call. The symmetry shouldn't be broken.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_CLOEXEC O_CLOEXEC
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
sleep (2);
pthread_kill ((pthread_t) arg, SIGUSR1);
return NULL;
}
static void
handler (int s)
{
}
int
main (void)
{
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
int coe = fcntl (s2, F_GETFD);
if (coe & FD_CLOEXEC)
{
puts ("paccept(0) set close-on-exec-flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
if (s2 < 0)
{
puts ("paccept(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (s2, F_GETFD);
if ((coe & FD_CLOEXEC) == 0)
{
puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
struct sigaction sa;
sa.sa_handler = handler;
sa.sa_flags = 0;
sigemptyset (&sa.sa_mask);
sigaction (SIGUSR1, &sa, NULL);
sigset_t ss;
pthread_sigmask (SIG_SETMASK, NULL, &ss);
sigaddset (&ss, SIGUSR1);
pthread_sigmask (SIG_SETMASK, &ss, NULL);
sigdelset (&ss, SIGUSR1);
alarm (4);
pthread_barrier_wait (&b);
errno = 0 ;
s2 = paccept (s, NULL, 0, &ss, 0);
if (s2 != -1 || errno != EINTR)
{
puts ("paccept did not fail with EINTR");
return 1;
}
close (s);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:20 +04:00
return - EINVAL ;
if ( SOCK_NONBLOCK ! = O_NONBLOCK & & ( flags & SOCK_NONBLOCK ) )
flags = ( flags & ~ SOCK_NONBLOCK ) | O_NONBLOCK ;
2006-03-21 09:27:12 +03:00
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
2005-04-17 02:20:36 +04:00
if ( ! sock )
goto out ;
err = - ENFILE ;
2006-09-01 11:19:31 +04:00
if ( ! ( newsock = sock_alloc ( ) ) )
2005-04-17 02:20:36 +04:00
goto out_put ;
newsock - > type = sock - > type ;
newsock - > ops = sock - > ops ;
/*
* We don ' t need try_module_get here , as the listening socket ( sock )
* has the protocol module ( sock - > ops - > owner ) held .
*/
__module_get ( newsock - > ops - > owner ) ;
2009-08-05 19:59:08 +04:00
newfd = sock_alloc_file ( newsock , & newfile , flags ) ;
2006-03-21 04:13:49 +03:00
if ( unlikely ( newfd < 0 ) ) {
err = newfd ;
2006-04-02 00:48:36 +04:00
sock_release ( newsock ) ;
goto out_put ;
2006-03-21 04:13:49 +03:00
}
2005-09-28 02:23:38 +04:00
err = security_socket_accept ( sock , newsock ) ;
if ( err )
2006-03-21 04:13:49 +03:00
goto out_fd ;
2005-09-28 02:23:38 +04:00
2005-04-17 02:20:36 +04:00
err = sock - > ops - > accept ( sock , newsock , sock - > file - > f_flags ) ;
if ( err < 0 )
2006-03-21 04:13:49 +03:00
goto out_fd ;
2005-04-17 02:20:36 +04:00
if ( upeer_sockaddr ) {
2008-07-20 09:35:47 +04:00
if ( newsock - > ops - > getname ( newsock , ( struct sockaddr * ) & address ,
2006-09-01 11:19:31 +04:00
& len , 2 ) < 0 ) {
2005-04-17 02:20:36 +04:00
err = - ECONNABORTED ;
2006-03-21 04:13:49 +03:00
goto out_fd ;
2005-04-17 02:20:36 +04:00
}
2008-07-20 09:35:47 +04:00
err = move_addr_to_user ( ( struct sockaddr * ) & address ,
len , upeer_sockaddr , upeer_addrlen ) ;
2005-04-17 02:20:36 +04:00
if ( err < 0 )
2006-03-21 04:13:49 +03:00
goto out_fd ;
2005-04-17 02:20:36 +04:00
}
/* File flags are not inherited via accept() unlike another OSes. */
2006-03-21 04:13:49 +03:00
fd_install ( newfd , newfile ) ;
err = newfd ;
2005-04-17 02:20:36 +04:00
out_put :
2006-03-21 09:27:12 +03:00
fput_light ( sock - > file , fput_needed ) ;
2005-04-17 02:20:36 +04:00
out :
return err ;
2006-03-21 04:13:49 +03:00
out_fd :
2006-04-01 13:00:14 +04:00
fput ( newfile ) ;
2006-03-21 04:13:49 +03:00
put_unused_fd ( newfd ) ;
2005-04-17 02:20:36 +04:00
goto out_put ;
}
2009-01-14 16:14:23 +03:00
SYSCALL_DEFINE3 ( accept , int , fd , struct sockaddr __user * , upeer_sockaddr ,
int __user * , upeer_addrlen )
flag parameters: paccept
This patch is by far the most complex in the series. It adds a new syscall
paccept. This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:
- a signal mask
- a flags value
The flags parameter can be used to set flag like SOCK_CLOEXEC. This is
imlpemented here as well. Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX. Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc). So an interface change in inevitable.
The flag value is the same as for socket and socketpair. I think diverging
here will only create confusion. Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.
The signal mask is handled as for pselect etc. The mask is temporarily
installed for the thread and removed before the call returns. I modeled the
code after pselect. If there is a problem it's likely also in pselect.
For architectures which use socketcall I maintained this interface instead of
adding a system call. The symmetry shouldn't be broken.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_CLOEXEC O_CLOEXEC
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
sleep (2);
pthread_kill ((pthread_t) arg, SIGUSR1);
return NULL;
}
static void
handler (int s)
{
}
int
main (void)
{
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
int coe = fcntl (s2, F_GETFD);
if (coe & FD_CLOEXEC)
{
puts ("paccept(0) set close-on-exec-flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
if (s2 < 0)
{
puts ("paccept(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (s2, F_GETFD);
if ((coe & FD_CLOEXEC) == 0)
{
puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
struct sigaction sa;
sa.sa_handler = handler;
sa.sa_flags = 0;
sigemptyset (&sa.sa_mask);
sigaction (SIGUSR1, &sa, NULL);
sigset_t ss;
pthread_sigmask (SIG_SETMASK, NULL, &ss);
sigaddset (&ss, SIGUSR1);
pthread_sigmask (SIG_SETMASK, &ss, NULL);
sigdelset (&ss, SIGUSR1);
alarm (4);
pthread_barrier_wait (&b);
errno = 0 ;
s2 = paccept (s, NULL, 0, &ss, 0);
if (s2 != -1 || errno != EINTR)
{
puts ("paccept did not fail with EINTR");
return 1;
}
close (s);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:20 +04:00
{
reintroduce accept4
Introduce a new accept4() system call. The addition of this system call
matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
inotify_init1(), epoll_create1(), pipe2()) which added new system calls
that differed from analogous traditional system calls in adding a flags
argument that can be used to access additional functionality.
The accept4() system call is exactly the same as accept(), except that
it adds a flags bit-mask argument. Two flags are initially implemented.
(Most of the new system calls in 2.6.27 also had both of these flags.)
SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled
for the new file descriptor returned by accept4(). This is a useful
security feature to avoid leaking information in a multithreaded
program where one thread is doing an accept() at the same time as
another thread is doing a fork() plus exec(). More details here:
http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling",
Ulrich Drepper).
The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag
to be enabled on the new open file description created by accept4().
(This flag is merely a convenience, saving the use of additional calls
fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result.
Here's a test program. Works on x86-32. Should work on x86-64, but
I (mtk) don't have a system to hand to test with.
It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies
that the appropriate flags are set on the file descriptor/open file
description returned by accept4().
I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.
/* test_accept4.c
Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
<mtk.manpages@gmail.com>
Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#define PORT_NUM 33333
#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)
/**********************************************************************/
/* The following is what we need until glibc gets a wrapper for
accept4() */
/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
#endif
#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif
static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
printf("Calling accept4(): flags = %x", flags);
if (flags != 0) {
printf(" (");
if (flags & SOCK_CLOEXEC)
printf("SOCK_CLOEXEC");
if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
printf(" ");
if (flags & SOCK_NONBLOCK)
printf("SOCK_NONBLOCK");
printf(")");
}
printf("\n");
#if USE_SOCKETCALL
long args[6];
args[0] = fd;
args[1] = (long) sockaddr;
args[2] = (long) addrlen;
args[3] = flags;
return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}
/**********************************************************************/
static int
do_test(int lfd, struct sockaddr_in *conn_addr,
int closeonexec_flag, int nonblock_flag)
{
int connfd, acceptfd;
int fdf, flf, fdf_pass, flf_pass;
struct sockaddr_in claddr;
socklen_t addrlen;
printf("=======================================\n");
connfd = socket(AF_INET, SOCK_STREAM, 0);
if (connfd == -1)
die("socket");
if (connect(connfd, (struct sockaddr *) conn_addr,
sizeof(struct sockaddr_in)) == -1)
die("connect");
addrlen = sizeof(struct sockaddr_in);
acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
closeonexec_flag | nonblock_flag);
if (acceptfd == -1) {
perror("accept4()");
close(connfd);
return 0;
}
fdf = fcntl(acceptfd, F_GETFD);
if (fdf == -1)
die("fcntl:F_GETFD");
fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
((closeonexec_flag & SOCK_CLOEXEC) != 0);
printf("Close-on-exec flag is %sset (%s); ",
(fdf & FD_CLOEXEC) ? "" : "not ",
fdf_pass ? "OK" : "failed");
flf = fcntl(acceptfd, F_GETFL);
if (flf == -1)
die("fcntl:F_GETFD");
flf_pass = ((flf & O_NONBLOCK) != 0) ==
((nonblock_flag & SOCK_NONBLOCK) !=0);
printf("nonblock flag is %sset (%s)\n",
(flf & O_NONBLOCK) ? "" : "not ",
flf_pass ? "OK" : "failed");
close(acceptfd);
close(connfd);
printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
return fdf_pass && flf_pass;
}
static int
create_listening_socket(int port_num)
{
struct sockaddr_in svaddr;
int lfd;
int optval;
memset(&svaddr, 0, sizeof(struct sockaddr_in));
svaddr.sin_family = AF_INET;
svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
svaddr.sin_port = htons(port_num);
lfd = socket(AF_INET, SOCK_STREAM, 0);
if (lfd == -1)
die("socket");
optval = 1;
if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof(optval)) == -1)
die("setsockopt");
if (bind(lfd, (struct sockaddr *) &svaddr,
sizeof(struct sockaddr_in)) == -1)
die("bind");
if (listen(lfd, 5) == -1)
die("listen");
return lfd;
}
int
main(int argc, char *argv[])
{
struct sockaddr_in conn_addr;
int lfd;
int port_num;
int passed;
passed = 1;
port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;
memset(&conn_addr, 0, sizeof(struct sockaddr_in));
conn_addr.sin_family = AF_INET;
conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
conn_addr.sin_port = htons(port_num);
lfd = create_listening_socket(port_num);
if (!do_test(lfd, &conn_addr, 0, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
passed = 0;
close(lfd);
exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}
[mtk.manpages@gmail.com: rewrote changelog, updated test program]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-11-20 02:36:14 +03:00
return sys_accept4 ( fd , upeer_sockaddr , upeer_addrlen , 0 ) ;
flag parameters: paccept
This patch is by far the most complex in the series. It adds a new syscall
paccept. This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:
- a signal mask
- a flags value
The flags parameter can be used to set flag like SOCK_CLOEXEC. This is
imlpemented here as well. Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX. Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc). So an interface change in inevitable.
The flag value is the same as for socket and socketpair. I think diverging
here will only create confusion. Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.
The signal mask is handled as for pselect etc. The mask is temporarily
installed for the thread and removed before the call returns. I modeled the
code after pselect. If there is a problem it's likely also in pselect.
For architectures which use socketcall I maintained this interface instead of
adding a system call. The symmetry shouldn't be broken.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_CLOEXEC O_CLOEXEC
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
sleep (2);
pthread_kill ((pthread_t) arg, SIGUSR1);
return NULL;
}
static void
handler (int s)
{
}
int
main (void)
{
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
int coe = fcntl (s2, F_GETFD);
if (coe & FD_CLOEXEC)
{
puts ("paccept(0) set close-on-exec-flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
if (s2 < 0)
{
puts ("paccept(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (s2, F_GETFD);
if ((coe & FD_CLOEXEC) == 0)
{
puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
struct sigaction sa;
sa.sa_handler = handler;
sa.sa_flags = 0;
sigemptyset (&sa.sa_mask);
sigaction (SIGUSR1, &sa, NULL);
sigset_t ss;
pthread_sigmask (SIG_SETMASK, NULL, &ss);
sigaddset (&ss, SIGUSR1);
pthread_sigmask (SIG_SETMASK, &ss, NULL);
sigdelset (&ss, SIGUSR1);
alarm (4);
pthread_barrier_wait (&b);
errno = 0 ;
s2 = paccept (s, NULL, 0, &ss, 0);
if (s2 != -1 || errno != EINTR)
{
puts ("paccept did not fail with EINTR");
return 1;
}
close (s);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:20 +04:00
}
2005-04-17 02:20:36 +04:00
/*
* Attempt to connect to a socket with the server address . The address
* is in user space so we verify it is OK and move it to kernel space .
*
* For 1003.1 g we need to add clean support for a bind to AF_UNSPEC to
* break bindings
*
* NOTE : 1003.1 g draft 6.3 is broken with respect to AX .25 / NetROM and
* other SEQPACKET protocols that take time to connect ( ) as it doesn ' t
* include the - EINPROGRESS status for such sockets .
*/
2009-01-14 16:14:23 +03:00
SYSCALL_DEFINE3 ( connect , int , fd , struct sockaddr __user * , uservaddr ,
int , addrlen )
2005-04-17 02:20:36 +04:00
{
struct socket * sock ;
2008-07-20 09:35:47 +04:00
struct sockaddr_storage address ;
2006-03-21 09:27:12 +03:00
int err , fput_needed ;
2005-04-17 02:20:36 +04:00
2006-03-21 09:27:12 +03:00
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
2005-04-17 02:20:36 +04:00
if ( ! sock )
goto out ;
2008-07-20 09:35:47 +04:00
err = move_addr_to_kernel ( uservaddr , addrlen , ( struct sockaddr * ) & address ) ;
2005-04-17 02:20:36 +04:00
if ( err < 0 )
goto out_put ;
2006-09-01 11:19:31 +04:00
err =
2008-07-20 09:35:47 +04:00
security_socket_connect ( sock , ( struct sockaddr * ) & address , addrlen ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out_put ;
2008-07-20 09:35:47 +04:00
err = sock - > ops - > connect ( sock , ( struct sockaddr * ) & address , addrlen ,
2005-04-17 02:20:36 +04:00
sock - > file - > f_flags ) ;
out_put :
2006-03-21 09:27:12 +03:00
fput_light ( sock - > file , fput_needed ) ;
2005-04-17 02:20:36 +04:00
out :
return err ;
}
/*
* Get the local address ( ' name ' ) of a socket object . Move the obtained
* name to user space .
*/
2009-01-14 16:14:23 +03:00
SYSCALL_DEFINE3 ( getsockname , int , fd , struct sockaddr __user * , usockaddr ,
int __user * , usockaddr_len )
2005-04-17 02:20:36 +04:00
{
struct socket * sock ;
2008-07-20 09:35:47 +04:00
struct sockaddr_storage address ;
2006-03-21 09:27:12 +03:00
int len , err , fput_needed ;
2006-09-01 11:19:31 +04:00
2006-03-21 09:27:12 +03:00
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
2005-04-17 02:20:36 +04:00
if ( ! sock )
goto out ;
err = security_socket_getsockname ( sock ) ;
if ( err )
goto out_put ;
2008-07-20 09:35:47 +04:00
err = sock - > ops - > getname ( sock , ( struct sockaddr * ) & address , & len , 0 ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out_put ;
2008-07-20 09:35:47 +04:00
err = move_addr_to_user ( ( struct sockaddr * ) & address , len , usockaddr , usockaddr_len ) ;
2005-04-17 02:20:36 +04:00
out_put :
2006-03-21 09:27:12 +03:00
fput_light ( sock - > file , fput_needed ) ;
2005-04-17 02:20:36 +04:00
out :
return err ;
}
/*
* Get the remote address ( ' name ' ) of a socket object . Move the obtained
* name to user space .
*/
2009-01-14 16:14:23 +03:00
SYSCALL_DEFINE3 ( getpeername , int , fd , struct sockaddr __user * , usockaddr ,
int __user * , usockaddr_len )
2005-04-17 02:20:36 +04:00
{
struct socket * sock ;
2008-07-20 09:35:47 +04:00
struct sockaddr_storage address ;
2006-03-21 09:27:12 +03:00
int len , err , fput_needed ;
2005-04-17 02:20:36 +04:00
2006-09-01 11:19:31 +04:00
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
if ( sock ! = NULL ) {
2005-04-17 02:20:36 +04:00
err = security_socket_getpeername ( sock ) ;
if ( err ) {
2006-03-21 09:27:12 +03:00
fput_light ( sock - > file , fput_needed ) ;
2005-04-17 02:20:36 +04:00
return err ;
}
2006-09-01 11:19:31 +04:00
err =
2008-07-20 09:35:47 +04:00
sock - > ops - > getname ( sock , ( struct sockaddr * ) & address , & len ,
2006-09-01 11:19:31 +04:00
1 ) ;
2005-04-17 02:20:36 +04:00
if ( ! err )
2008-07-20 09:35:47 +04:00
err = move_addr_to_user ( ( struct sockaddr * ) & address , len , usockaddr ,
2006-09-01 11:19:31 +04:00
usockaddr_len ) ;
2006-03-21 09:27:12 +03:00
fput_light ( sock - > file , fput_needed ) ;
2005-04-17 02:20:36 +04:00
}
return err ;
}
/*
* Send a datagram to a given address . We move the address into kernel
* space and check the user space data area is readable before invoking
* the protocol .
*/
2009-01-14 16:14:24 +03:00
SYSCALL_DEFINE6 ( sendto , int , fd , void __user * , buff , size_t , len ,
unsigned , flags , struct sockaddr __user * , addr ,
int , addr_len )
2005-04-17 02:20:36 +04:00
{
struct socket * sock ;
2008-07-20 09:35:47 +04:00
struct sockaddr_storage address ;
2005-04-17 02:20:36 +04:00
int err ;
struct msghdr msg ;
struct iovec iov ;
2006-03-21 09:27:12 +03:00
int fput_needed ;
2007-11-15 03:01:43 +03:00
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
if ( ! sock )
2007-02-09 02:06:08 +03:00
goto out ;
2006-03-21 09:27:12 +03:00
2006-09-01 11:19:31 +04:00
iov . iov_base = buff ;
iov . iov_len = len ;
msg . msg_name = NULL ;
msg . msg_iov = & iov ;
msg . msg_iovlen = 1 ;
msg . msg_control = NULL ;
msg . msg_controllen = 0 ;
msg . msg_namelen = 0 ;
2006-03-21 09:27:12 +03:00
if ( addr ) {
2008-07-20 09:35:47 +04:00
err = move_addr_to_kernel ( addr , addr_len , ( struct sockaddr * ) & address ) ;
2005-04-17 02:20:36 +04:00
if ( err < 0 )
goto out_put ;
2008-07-20 09:35:47 +04:00
msg . msg_name = ( struct sockaddr * ) & address ;
2006-09-01 11:19:31 +04:00
msg . msg_namelen = addr_len ;
2005-04-17 02:20:36 +04:00
}
if ( sock - > file - > f_flags & O_NONBLOCK )
flags | = MSG_DONTWAIT ;
msg . msg_flags = flags ;
err = sock_sendmsg ( sock , & msg , len ) ;
2006-09-01 11:19:31 +04:00
out_put :
2007-11-15 03:01:43 +03:00
fput_light ( sock - > file , fput_needed ) ;
2007-02-09 02:06:08 +03:00
out :
2005-04-17 02:20:36 +04:00
return err ;
}
/*
2006-09-01 11:19:31 +04:00
* Send a datagram down a socket .
2005-04-17 02:20:36 +04:00
*/
2009-01-14 16:14:24 +03:00
SYSCALL_DEFINE4 ( send , int , fd , void __user * , buff , size_t , len ,
unsigned , flags )
2005-04-17 02:20:36 +04:00
{
return sys_sendto ( fd , buff , len , flags , NULL , 0 ) ;
}
/*
2006-09-01 11:19:31 +04:00
* Receive a frame from the socket and optionally record the address of the
2005-04-17 02:20:36 +04:00
* sender . We verify the buffers are writable and if needed move the
* sender address from kernel to user space .
*/
2009-01-14 16:14:24 +03:00
SYSCALL_DEFINE6 ( recvfrom , int , fd , void __user * , ubuf , size_t , size ,
unsigned , flags , struct sockaddr __user * , addr ,
int __user * , addr_len )
2005-04-17 02:20:36 +04:00
{
struct socket * sock ;
struct iovec iov ;
struct msghdr msg ;
2008-07-20 09:35:47 +04:00
struct sockaddr_storage address ;
2006-09-01 11:19:31 +04:00
int err , err2 ;
2006-03-21 09:27:12 +03:00
int fput_needed ;
2007-11-15 03:01:43 +03:00
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
2005-04-17 02:20:36 +04:00
if ( ! sock )
2007-11-15 03:01:43 +03:00
goto out ;
2005-04-17 02:20:36 +04:00
2006-09-01 11:19:31 +04:00
msg . msg_control = NULL ;
msg . msg_controllen = 0 ;
msg . msg_iovlen = 1 ;
msg . msg_iov = & iov ;
iov . iov_len = size ;
iov . iov_base = ubuf ;
2008-07-20 09:35:47 +04:00
msg . msg_name = ( struct sockaddr * ) & address ;
msg . msg_namelen = sizeof ( address ) ;
2005-04-17 02:20:36 +04:00
if ( sock - > file - > f_flags & O_NONBLOCK )
flags | = MSG_DONTWAIT ;
2006-09-01 11:19:31 +04:00
err = sock_recvmsg ( sock , & msg , size , flags ) ;
2005-04-17 02:20:36 +04:00
2006-09-01 11:19:31 +04:00
if ( err > = 0 & & addr ! = NULL ) {
2008-07-20 09:35:47 +04:00
err2 = move_addr_to_user ( ( struct sockaddr * ) & address ,
msg . msg_namelen , addr , addr_len ) ;
2006-09-01 11:19:31 +04:00
if ( err2 < 0 )
err = err2 ;
2005-04-17 02:20:36 +04:00
}
2007-11-15 03:01:43 +03:00
fput_light ( sock - > file , fput_needed ) ;
2007-02-09 02:06:08 +03:00
out :
2005-04-17 02:20:36 +04:00
return err ;
}
/*
2006-09-01 11:19:31 +04:00
* Receive a datagram from a socket .
2005-04-17 02:20:36 +04:00
*/
2006-09-01 11:19:31 +04:00
asmlinkage long sys_recv ( int fd , void __user * ubuf , size_t size ,
unsigned flags )
2005-04-17 02:20:36 +04:00
{
return sys_recvfrom ( fd , ubuf , size , flags , NULL , NULL ) ;
}
/*
* Set a socket option . Because we don ' t know the option lengths we have
* to pass the user mode parameter for the protocols to sort out .
*/
2009-01-14 16:14:23 +03:00
SYSCALL_DEFINE5 ( setsockopt , int , fd , int , level , int , optname ,
char __user * , optval , int , optlen )
2005-04-17 02:20:36 +04:00
{
2006-03-21 09:27:12 +03:00
int err , fput_needed ;
2005-04-17 02:20:36 +04:00
struct socket * sock ;
if ( optlen < 0 )
return - EINVAL ;
2006-09-01 11:19:31 +04:00
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
if ( sock ! = NULL ) {
err = security_socket_setsockopt ( sock , level , optname ) ;
2006-03-21 09:27:12 +03:00
if ( err )
goto out_put ;
2005-04-17 02:20:36 +04:00
if ( level = = SOL_SOCKET )
2006-09-01 11:19:31 +04:00
err =
sock_setsockopt ( sock , level , optname , optval ,
optlen ) ;
2005-04-17 02:20:36 +04:00
else
2006-09-01 11:19:31 +04:00
err =
sock - > ops - > setsockopt ( sock , level , optname , optval ,
optlen ) ;
2006-03-21 09:27:12 +03:00
out_put :
fput_light ( sock - > file , fput_needed ) ;
2005-04-17 02:20:36 +04:00
}
return err ;
}
/*
* Get a socket option . Because we don ' t know the option lengths we have
* to pass a user mode parameter for the protocols to sort out .
*/
2009-01-14 16:14:23 +03:00
SYSCALL_DEFINE5 ( getsockopt , int , fd , int , level , int , optname ,
char __user * , optval , int __user * , optlen )
2005-04-17 02:20:36 +04:00
{
2006-03-21 09:27:12 +03:00
int err , fput_needed ;
2005-04-17 02:20:36 +04:00
struct socket * sock ;
2006-09-01 11:19:31 +04:00
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
if ( sock ! = NULL ) {
2006-03-21 09:27:12 +03:00
err = security_socket_getsockopt ( sock , level , optname ) ;
if ( err )
goto out_put ;
2005-04-17 02:20:36 +04:00
if ( level = = SOL_SOCKET )
2006-09-01 11:19:31 +04:00
err =
sock_getsockopt ( sock , level , optname , optval ,
optlen ) ;
2005-04-17 02:20:36 +04:00
else
2006-09-01 11:19:31 +04:00
err =
sock - > ops - > getsockopt ( sock , level , optname , optval ,
optlen ) ;
2006-03-21 09:27:12 +03:00
out_put :
fput_light ( sock - > file , fput_needed ) ;
2005-04-17 02:20:36 +04:00
}
return err ;
}
/*
* Shutdown a socket .
*/
2009-01-14 16:14:09 +03:00
SYSCALL_DEFINE2 ( shutdown , int , fd , int , how )
2005-04-17 02:20:36 +04:00
{
2006-03-21 09:27:12 +03:00
int err , fput_needed ;
2005-04-17 02:20:36 +04:00
struct socket * sock ;
2006-09-01 11:19:31 +04:00
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
if ( sock ! = NULL ) {
2005-04-17 02:20:36 +04:00
err = security_socket_shutdown ( sock , how ) ;
2006-03-21 09:27:12 +03:00
if ( ! err )
err = sock - > ops - > shutdown ( sock , how ) ;
fput_light ( sock - > file , fput_needed ) ;
2005-04-17 02:20:36 +04:00
}
return err ;
}
2006-09-01 11:19:31 +04:00
/* A couple of helpful macros for getting the address of the 32/64 bit
2005-04-17 02:20:36 +04:00
* fields which are the same type ( int / unsigned ) on our platforms .
*/
# define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
# define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
# define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
/*
* BSD sendmsg interface
*/
2009-01-14 16:14:24 +03:00
SYSCALL_DEFINE3 ( sendmsg , int , fd , struct msghdr __user * , msg , unsigned , flags )
2005-04-17 02:20:36 +04:00
{
2006-09-01 11:19:31 +04:00
struct compat_msghdr __user * msg_compat =
( struct compat_msghdr __user * ) msg ;
2005-04-17 02:20:36 +04:00
struct socket * sock ;
2008-07-20 09:35:47 +04:00
struct sockaddr_storage address ;
2005-04-17 02:20:36 +04:00
struct iovec iovstack [ UIO_FASTIOV ] , * iov = iovstack ;
2005-09-27 01:28:02 +04:00
unsigned char ctl [ sizeof ( struct cmsghdr ) + 20 ]
2006-09-01 11:19:31 +04:00
__attribute__ ( ( aligned ( sizeof ( __kernel_size_t ) ) ) ) ;
/* 20 is size of ipv6_pktinfo */
2005-04-17 02:20:36 +04:00
unsigned char * ctl_buf = ctl ;
struct msghdr msg_sys ;
int err , ctl_len , iov_size , total_len ;
2006-03-21 09:27:12 +03:00
int fput_needed ;
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
err = - EFAULT ;
if ( MSG_CMSG_COMPAT & flags ) {
if ( get_compat_msghdr ( & msg_sys , msg_compat ) )
return - EFAULT ;
2006-09-01 11:19:31 +04:00
}
else if ( copy_from_user ( & msg_sys , msg , sizeof ( struct msghdr ) ) )
2005-04-17 02:20:36 +04:00
return - EFAULT ;
2006-03-21 09:27:12 +03:00
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
2006-09-01 11:19:31 +04:00
if ( ! sock )
2005-04-17 02:20:36 +04:00
goto out ;
/* do not move before msg_sys is valid */
err = - EMSGSIZE ;
if ( msg_sys . msg_iovlen > UIO_MAXIOV )
goto out_put ;
2006-09-01 11:19:31 +04:00
/* Check whether to allocate the iovec area */
2005-04-17 02:20:36 +04:00
err = - ENOMEM ;
iov_size = msg_sys . msg_iovlen * sizeof ( struct iovec ) ;
if ( msg_sys . msg_iovlen > UIO_FASTIOV ) {
iov = sock_kmalloc ( sock - > sk , iov_size , GFP_KERNEL ) ;
if ( ! iov )
goto out_put ;
}
/* This will also move the address data into kernel space */
if ( MSG_CMSG_COMPAT & flags ) {
2008-07-20 09:35:47 +04:00
err = verify_compat_iovec ( & msg_sys , iov ,
( struct sockaddr * ) & address ,
VERIFY_READ ) ;
2005-04-17 02:20:36 +04:00
} else
2008-07-20 09:35:47 +04:00
err = verify_iovec ( & msg_sys , iov ,
( struct sockaddr * ) & address ,
VERIFY_READ ) ;
2006-09-01 11:19:31 +04:00
if ( err < 0 )
2005-04-17 02:20:36 +04:00
goto out_freeiov ;
total_len = err ;
err = - ENOBUFS ;
if ( msg_sys . msg_controllen > INT_MAX )
goto out_freeiov ;
2006-09-01 11:19:31 +04:00
ctl_len = msg_sys . msg_controllen ;
2005-04-17 02:20:36 +04:00
if ( ( MSG_CMSG_COMPAT & flags ) & & ctl_len ) {
2006-09-01 11:19:31 +04:00
err =
cmsghdr_from_user_compat_to_kern ( & msg_sys , sock - > sk , ctl ,
sizeof ( ctl ) ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out_freeiov ;
ctl_buf = msg_sys . msg_control ;
2005-09-08 05:28:51 +04:00
ctl_len = msg_sys . msg_controllen ;
2005-04-17 02:20:36 +04:00
} else if ( ctl_len ) {
2006-09-01 11:19:31 +04:00
if ( ctl_len > sizeof ( ctl ) ) {
2005-04-17 02:20:36 +04:00
ctl_buf = sock_kmalloc ( sock - > sk , ctl_len , GFP_KERNEL ) ;
2006-09-01 11:19:31 +04:00
if ( ctl_buf = = NULL )
2005-04-17 02:20:36 +04:00
goto out_freeiov ;
}
err = - EFAULT ;
/*
* Careful ! Before this , msg_sys . msg_control contains a user pointer .
* Afterwards , it will be a kernel pointer . Thus the compiler - assisted
* checking falls down on this .
*/
2006-09-01 11:19:31 +04:00
if ( copy_from_user ( ctl_buf , ( void __user * ) msg_sys . msg_control ,
ctl_len ) )
2005-04-17 02:20:36 +04:00
goto out_freectl ;
msg_sys . msg_control = ctl_buf ;
}
msg_sys . msg_flags = flags ;
if ( sock - > file - > f_flags & O_NONBLOCK )
msg_sys . msg_flags | = MSG_DONTWAIT ;
err = sock_sendmsg ( sock , & msg_sys , total_len ) ;
out_freectl :
2006-09-01 11:19:31 +04:00
if ( ctl_buf ! = ctl )
2005-04-17 02:20:36 +04:00
sock_kfree_s ( sock - > sk , ctl_buf , ctl_len ) ;
out_freeiov :
if ( iov ! = iovstack )
sock_kfree_s ( sock - > sk , iov , iov_size ) ;
out_put :
2006-03-21 09:27:12 +03:00
fput_light ( sock - > file , fput_needed ) ;
2006-09-01 11:19:31 +04:00
out :
2005-04-17 02:20:36 +04:00
return err ;
}
2009-10-13 10:40:10 +04:00
static int __sys_recvmsg ( struct socket * sock , struct msghdr __user * msg ,
struct msghdr * msg_sys , unsigned flags , int nosec )
2005-04-17 02:20:36 +04:00
{
2006-09-01 11:19:31 +04:00
struct compat_msghdr __user * msg_compat =
( struct compat_msghdr __user * ) msg ;
2005-04-17 02:20:36 +04:00
struct iovec iovstack [ UIO_FASTIOV ] ;
2006-09-01 11:19:31 +04:00
struct iovec * iov = iovstack ;
2005-04-17 02:20:36 +04:00
unsigned long cmsg_ptr ;
int err , iov_size , total_len , len ;
/* kernel mode address */
2008-07-20 09:35:47 +04:00
struct sockaddr_storage addr ;
2005-04-17 02:20:36 +04:00
/* user mode address pointers */
struct sockaddr __user * uaddr ;
int __user * uaddr_len ;
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
if ( MSG_CMSG_COMPAT & flags ) {
2009-10-13 10:40:10 +04:00
if ( get_compat_msghdr ( msg_sys , msg_compat ) )
2005-04-17 02:20:36 +04:00
return - EFAULT ;
2006-09-01 11:19:31 +04:00
}
2009-10-13 10:40:10 +04:00
else if ( copy_from_user ( msg_sys , msg , sizeof ( struct msghdr ) ) )
2006-09-01 11:19:31 +04:00
return - EFAULT ;
2005-04-17 02:20:36 +04:00
err = - EMSGSIZE ;
2009-10-13 10:40:10 +04:00
if ( msg_sys - > msg_iovlen > UIO_MAXIOV )
goto out ;
2006-09-01 11:19:31 +04:00
/* Check whether to allocate the iovec area */
2005-04-17 02:20:36 +04:00
err = - ENOMEM ;
2009-10-13 10:40:10 +04:00
iov_size = msg_sys - > msg_iovlen * sizeof ( struct iovec ) ;
if ( msg_sys - > msg_iovlen > UIO_FASTIOV ) {
2005-04-17 02:20:36 +04:00
iov = sock_kmalloc ( sock - > sk , iov_size , GFP_KERNEL ) ;
if ( ! iov )
2009-10-13 10:40:10 +04:00
goto out ;
2005-04-17 02:20:36 +04:00
}
/*
2006-09-01 11:19:31 +04:00
* Save the user - mode address ( verify_iovec will change the
* kernel msghdr to use the kernel address space )
2005-04-17 02:20:36 +04:00
*/
2006-09-01 11:19:31 +04:00
2009-10-13 10:40:10 +04:00
uaddr = ( __force void __user * ) msg_sys - > msg_name ;
2005-04-17 02:20:36 +04:00
uaddr_len = COMPAT_NAMELEN ( msg ) ;
if ( MSG_CMSG_COMPAT & flags ) {
2009-10-13 10:40:10 +04:00
err = verify_compat_iovec ( msg_sys , iov ,
2008-07-20 09:35:47 +04:00
( struct sockaddr * ) & addr ,
VERIFY_WRITE ) ;
2005-04-17 02:20:36 +04:00
} else
2009-10-13 10:40:10 +04:00
err = verify_iovec ( msg_sys , iov ,
2008-07-20 09:35:47 +04:00
( struct sockaddr * ) & addr ,
VERIFY_WRITE ) ;
2005-04-17 02:20:36 +04:00
if ( err < 0 )
goto out_freeiov ;
2006-09-01 11:19:31 +04:00
total_len = err ;
2005-04-17 02:20:36 +04:00
2009-10-13 10:40:10 +04:00
cmsg_ptr = ( unsigned long ) msg_sys - > msg_control ;
msg_sys - > msg_flags = flags & ( MSG_CMSG_CLOEXEC | MSG_CMSG_COMPAT ) ;
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
if ( sock - > file - > f_flags & O_NONBLOCK )
flags | = MSG_DONTWAIT ;
2009-10-13 10:40:10 +04:00
err = ( nosec ? sock_recvmsg_nosec : sock_recvmsg ) ( sock , msg_sys ,
total_len , flags ) ;
2005-04-17 02:20:36 +04:00
if ( err < 0 )
goto out_freeiov ;
len = err ;
if ( uaddr ! = NULL ) {
2008-07-20 09:35:47 +04:00
err = move_addr_to_user ( ( struct sockaddr * ) & addr ,
2009-10-13 10:40:10 +04:00
msg_sys - > msg_namelen , uaddr ,
2006-09-01 11:19:31 +04:00
uaddr_len ) ;
2005-04-17 02:20:36 +04:00
if ( err < 0 )
goto out_freeiov ;
}
2009-10-13 10:40:10 +04:00
err = __put_user ( ( msg_sys - > msg_flags & ~ MSG_CMSG_COMPAT ) ,
2005-09-17 03:51:01 +04:00
COMPAT_FLAGS ( msg ) ) ;
2005-04-17 02:20:36 +04:00
if ( err )
goto out_freeiov ;
if ( MSG_CMSG_COMPAT & flags )
2009-10-13 10:40:10 +04:00
err = __put_user ( ( unsigned long ) msg_sys - > msg_control - cmsg_ptr ,
2005-04-17 02:20:36 +04:00
& msg_compat - > msg_controllen ) ;
else
2009-10-13 10:40:10 +04:00
err = __put_user ( ( unsigned long ) msg_sys - > msg_control - cmsg_ptr ,
2005-04-17 02:20:36 +04:00
& msg - > msg_controllen ) ;
if ( err )
goto out_freeiov ;
err = len ;
out_freeiov :
if ( iov ! = iovstack )
sock_kfree_s ( sock - > sk , iov , iov_size ) ;
2009-10-13 10:40:10 +04:00
out :
return err ;
}
/*
* BSD recvmsg interface
*/
SYSCALL_DEFINE3 ( recvmsg , int , fd , struct msghdr __user * , msg ,
unsigned int , flags )
{
int fput_needed , err ;
struct msghdr msg_sys ;
struct socket * sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
if ( ! sock )
goto out ;
err = __sys_recvmsg ( sock , msg , & msg_sys , flags , 0 ) ;
2006-03-21 09:27:12 +03:00
fput_light ( sock - > file , fput_needed ) ;
2005-04-17 02:20:36 +04:00
out :
return err ;
}
2009-10-13 10:40:10 +04:00
/*
* Linux recvmmsg interface
*/
int __sys_recvmmsg ( int fd , struct mmsghdr __user * mmsg , unsigned int vlen ,
unsigned int flags , struct timespec * timeout )
{
int fput_needed , err , datagrams ;
struct socket * sock ;
struct mmsghdr __user * entry ;
2009-12-01 11:47:26 +03:00
struct compat_mmsghdr __user * compat_entry ;
2009-10-13 10:40:10 +04:00
struct msghdr msg_sys ;
struct timespec end_time ;
if ( timeout & &
poll_select_set_timeout ( & end_time , timeout - > tv_sec ,
timeout - > tv_nsec ) )
return - EINVAL ;
datagrams = 0 ;
sock = sockfd_lookup_light ( fd , & err , & fput_needed ) ;
if ( ! sock )
return err ;
err = sock_error ( sock - > sk ) ;
if ( err )
goto out_put ;
entry = mmsg ;
2009-12-01 11:47:26 +03:00
compat_entry = ( struct compat_mmsghdr __user * ) mmsg ;
2009-10-13 10:40:10 +04:00
while ( datagrams < vlen ) {
/*
* No need to ask LSM for more than the first datagram .
*/
2009-12-01 11:47:26 +03:00
if ( MSG_CMSG_COMPAT & flags ) {
err = __sys_recvmsg ( sock , ( struct msghdr __user * ) compat_entry ,
& msg_sys , flags , datagrams ) ;
if ( err < 0 )
break ;
err = __put_user ( err , & compat_entry - > msg_len ) ;
+ + compat_entry ;
} else {
err = __sys_recvmsg ( sock , ( struct msghdr __user * ) entry ,
& msg_sys , flags , datagrams ) ;
if ( err < 0 )
break ;
err = put_user ( err , & entry - > msg_len ) ;
+ + entry ;
}
2009-10-13 10:40:10 +04:00
if ( err )
break ;
+ + datagrams ;
2010-03-26 19:18:03 +03:00
/* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
if ( flags & MSG_WAITFORONE )
flags | = MSG_DONTWAIT ;
2009-10-13 10:40:10 +04:00
if ( timeout ) {
ktime_get_ts ( timeout ) ;
* timeout = timespec_sub ( end_time , * timeout ) ;
if ( timeout - > tv_sec < 0 ) {
timeout - > tv_sec = timeout - > tv_nsec = 0 ;
break ;
}
/* Timeout, return less than vlen datagrams */
if ( timeout - > tv_nsec = = 0 & & timeout - > tv_sec = = 0 )
break ;
}
/* Out of band data, return right away */
if ( msg_sys . msg_flags & MSG_OOB )
break ;
}
out_put :
fput_light ( sock - > file , fput_needed ) ;
2005-04-17 02:20:36 +04:00
2009-10-13 10:40:10 +04:00
if ( err = = 0 )
return datagrams ;
if ( datagrams ! = 0 ) {
/*
* We may return less entries than requested ( vlen ) if the
* sock is non block and there aren ' t enough datagrams . . .
*/
if ( err ! = - EAGAIN ) {
/*
* . . . or if recvmsg returns an error after we
* received some datagrams , where we record the
* error to return on the next call or if the
* app asks about it using getsockopt ( SO_ERROR ) .
*/
sock - > sk - > sk_err = - err ;
}
return datagrams ;
}
return err ;
}
SYSCALL_DEFINE5 ( recvmmsg , int , fd , struct mmsghdr __user * , mmsg ,
unsigned int , vlen , unsigned int , flags ,
struct timespec __user * , timeout )
{
int datagrams ;
struct timespec timeout_sys ;
if ( ! timeout )
return __sys_recvmmsg ( fd , mmsg , vlen , flags , NULL ) ;
if ( copy_from_user ( & timeout_sys , timeout , sizeof ( timeout_sys ) ) )
return - EFAULT ;
datagrams = __sys_recvmmsg ( fd , mmsg , vlen , flags , & timeout_sys ) ;
if ( datagrams > 0 & &
copy_to_user ( timeout , & timeout_sys , sizeof ( timeout_sys ) ) )
datagrams = - EFAULT ;
return datagrams ;
}
# ifdef __ARCH_WANT_SYS_SOCKETCALL
2005-04-17 02:20:36 +04:00
/* Argument list sizes for sys_socketcall */
# define AL(x) ((x) * sizeof(unsigned long))
2009-10-13 10:40:10 +04:00
static const unsigned char nargs [ 20 ] = {
2006-09-01 11:19:31 +04:00
AL ( 0 ) , AL ( 3 ) , AL ( 3 ) , AL ( 3 ) , AL ( 2 ) , AL ( 3 ) ,
AL ( 3 ) , AL ( 3 ) , AL ( 4 ) , AL ( 4 ) , AL ( 4 ) , AL ( 6 ) ,
flag parameters: paccept
This patch is by far the most complex in the series. It adds a new syscall
paccept. This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:
- a signal mask
- a flags value
The flags parameter can be used to set flag like SOCK_CLOEXEC. This is
imlpemented here as well. Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX. Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc). So an interface change in inevitable.
The flag value is the same as for socket and socketpair. I think diverging
here will only create confusion. Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.
The signal mask is handled as for pselect etc. The mask is temporarily
installed for the thread and removed before the call returns. I modeled the
code after pselect. If there is a problem it's likely also in pselect.
For architectures which use socketcall I maintained this interface instead of
adding a system call. The symmetry shouldn't be broken.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_CLOEXEC O_CLOEXEC
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
sleep (2);
pthread_kill ((pthread_t) arg, SIGUSR1);
return NULL;
}
static void
handler (int s)
{
}
int
main (void)
{
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
int coe = fcntl (s2, F_GETFD);
if (coe & FD_CLOEXEC)
{
puts ("paccept(0) set close-on-exec-flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
if (s2 < 0)
{
puts ("paccept(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (s2, F_GETFD);
if ((coe & FD_CLOEXEC) == 0)
{
puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
struct sigaction sa;
sa.sa_handler = handler;
sa.sa_flags = 0;
sigemptyset (&sa.sa_mask);
sigaction (SIGUSR1, &sa, NULL);
sigset_t ss;
pthread_sigmask (SIG_SETMASK, NULL, &ss);
sigaddset (&ss, SIGUSR1);
pthread_sigmask (SIG_SETMASK, &ss, NULL);
sigdelset (&ss, SIGUSR1);
alarm (4);
pthread_barrier_wait (&b);
errno = 0 ;
s2 = paccept (s, NULL, 0, &ss, 0);
if (s2 != -1 || errno != EINTR)
{
puts ("paccept did not fail with EINTR");
return 1;
}
close (s);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:20 +04:00
AL ( 6 ) , AL ( 2 ) , AL ( 5 ) , AL ( 5 ) , AL ( 3 ) , AL ( 3 ) ,
2009-10-13 10:40:10 +04:00
AL ( 4 ) , AL ( 5 )
2006-09-01 11:19:31 +04:00
} ;
2005-04-17 02:20:36 +04:00
# undef AL
/*
2006-09-01 11:19:31 +04:00
* System call vectors .
2005-04-17 02:20:36 +04:00
*
* Argument checking cleaned up . Saved 20 % in size .
* This function doesn ' t need to set the kernel lock because
2006-09-01 11:19:31 +04:00
* it is set by the callees .
2005-04-17 02:20:36 +04:00
*/
2009-01-14 16:14:24 +03:00
SYSCALL_DEFINE2 ( socketcall , int , call , unsigned long __user * , args )
2005-04-17 02:20:36 +04:00
{
unsigned long a [ 6 ] ;
2006-09-01 11:19:31 +04:00
unsigned long a0 , a1 ;
2005-04-17 02:20:36 +04:00
int err ;
2009-09-28 23:57:44 +04:00
unsigned int len ;
2005-04-17 02:20:36 +04:00
2009-10-13 10:40:10 +04:00
if ( call < 1 | | call > SYS_RECVMMSG )
2005-04-17 02:20:36 +04:00
return - EINVAL ;
2009-09-28 23:57:44 +04:00
len = nargs [ call ] ;
if ( len > sizeof ( a ) )
return - EINVAL ;
2005-04-17 02:20:36 +04:00
/* copy_from_user should be SMP safe. */
2009-09-28 23:57:44 +04:00
if ( copy_from_user ( a , args , len ) )
2005-04-17 02:20:36 +04:00
return - EFAULT ;
2005-05-17 15:08:48 +04:00
2008-12-10 11:16:51 +03:00
audit_socketcall ( nargs [ call ] / sizeof ( unsigned long ) , a ) ;
2005-05-17 15:08:48 +04:00
2006-09-01 11:19:31 +04:00
a0 = a [ 0 ] ;
a1 = a [ 1 ] ;
switch ( call ) {
case SYS_SOCKET :
err = sys_socket ( a0 , a1 , a [ 2 ] ) ;
break ;
case SYS_BIND :
err = sys_bind ( a0 , ( struct sockaddr __user * ) a1 , a [ 2 ] ) ;
break ;
case SYS_CONNECT :
err = sys_connect ( a0 , ( struct sockaddr __user * ) a1 , a [ 2 ] ) ;
break ;
case SYS_LISTEN :
err = sys_listen ( a0 , a1 ) ;
break ;
case SYS_ACCEPT :
reintroduce accept4
Introduce a new accept4() system call. The addition of this system call
matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
inotify_init1(), epoll_create1(), pipe2()) which added new system calls
that differed from analogous traditional system calls in adding a flags
argument that can be used to access additional functionality.
The accept4() system call is exactly the same as accept(), except that
it adds a flags bit-mask argument. Two flags are initially implemented.
(Most of the new system calls in 2.6.27 also had both of these flags.)
SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled
for the new file descriptor returned by accept4(). This is a useful
security feature to avoid leaking information in a multithreaded
program where one thread is doing an accept() at the same time as
another thread is doing a fork() plus exec(). More details here:
http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling",
Ulrich Drepper).
The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag
to be enabled on the new open file description created by accept4().
(This flag is merely a convenience, saving the use of additional calls
fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result.
Here's a test program. Works on x86-32. Should work on x86-64, but
I (mtk) don't have a system to hand to test with.
It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies
that the appropriate flags are set on the file descriptor/open file
description returned by accept4().
I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.
/* test_accept4.c
Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
<mtk.manpages@gmail.com>
Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#define PORT_NUM 33333
#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)
/**********************************************************************/
/* The following is what we need until glibc gets a wrapper for
accept4() */
/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
#endif
#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif
static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
printf("Calling accept4(): flags = %x", flags);
if (flags != 0) {
printf(" (");
if (flags & SOCK_CLOEXEC)
printf("SOCK_CLOEXEC");
if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
printf(" ");
if (flags & SOCK_NONBLOCK)
printf("SOCK_NONBLOCK");
printf(")");
}
printf("\n");
#if USE_SOCKETCALL
long args[6];
args[0] = fd;
args[1] = (long) sockaddr;
args[2] = (long) addrlen;
args[3] = flags;
return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}
/**********************************************************************/
static int
do_test(int lfd, struct sockaddr_in *conn_addr,
int closeonexec_flag, int nonblock_flag)
{
int connfd, acceptfd;
int fdf, flf, fdf_pass, flf_pass;
struct sockaddr_in claddr;
socklen_t addrlen;
printf("=======================================\n");
connfd = socket(AF_INET, SOCK_STREAM, 0);
if (connfd == -1)
die("socket");
if (connect(connfd, (struct sockaddr *) conn_addr,
sizeof(struct sockaddr_in)) == -1)
die("connect");
addrlen = sizeof(struct sockaddr_in);
acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
closeonexec_flag | nonblock_flag);
if (acceptfd == -1) {
perror("accept4()");
close(connfd);
return 0;
}
fdf = fcntl(acceptfd, F_GETFD);
if (fdf == -1)
die("fcntl:F_GETFD");
fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
((closeonexec_flag & SOCK_CLOEXEC) != 0);
printf("Close-on-exec flag is %sset (%s); ",
(fdf & FD_CLOEXEC) ? "" : "not ",
fdf_pass ? "OK" : "failed");
flf = fcntl(acceptfd, F_GETFL);
if (flf == -1)
die("fcntl:F_GETFD");
flf_pass = ((flf & O_NONBLOCK) != 0) ==
((nonblock_flag & SOCK_NONBLOCK) !=0);
printf("nonblock flag is %sset (%s)\n",
(flf & O_NONBLOCK) ? "" : "not ",
flf_pass ? "OK" : "failed");
close(acceptfd);
close(connfd);
printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
return fdf_pass && flf_pass;
}
static int
create_listening_socket(int port_num)
{
struct sockaddr_in svaddr;
int lfd;
int optval;
memset(&svaddr, 0, sizeof(struct sockaddr_in));
svaddr.sin_family = AF_INET;
svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
svaddr.sin_port = htons(port_num);
lfd = socket(AF_INET, SOCK_STREAM, 0);
if (lfd == -1)
die("socket");
optval = 1;
if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof(optval)) == -1)
die("setsockopt");
if (bind(lfd, (struct sockaddr *) &svaddr,
sizeof(struct sockaddr_in)) == -1)
die("bind");
if (listen(lfd, 5) == -1)
die("listen");
return lfd;
}
int
main(int argc, char *argv[])
{
struct sockaddr_in conn_addr;
int lfd;
int port_num;
int passed;
passed = 1;
port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;
memset(&conn_addr, 0, sizeof(struct sockaddr_in));
conn_addr.sin_family = AF_INET;
conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
conn_addr.sin_port = htons(port_num);
lfd = create_listening_socket(port_num);
if (!do_test(lfd, &conn_addr, 0, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
passed = 0;
close(lfd);
exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}
[mtk.manpages@gmail.com: rewrote changelog, updated test program]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-11-20 02:36:14 +03:00
err = sys_accept4 ( a0 , ( struct sockaddr __user * ) a1 ,
( int __user * ) a [ 2 ] , 0 ) ;
2006-09-01 11:19:31 +04:00
break ;
case SYS_GETSOCKNAME :
err =
sys_getsockname ( a0 , ( struct sockaddr __user * ) a1 ,
( int __user * ) a [ 2 ] ) ;
break ;
case SYS_GETPEERNAME :
err =
sys_getpeername ( a0 , ( struct sockaddr __user * ) a1 ,
( int __user * ) a [ 2 ] ) ;
break ;
case SYS_SOCKETPAIR :
err = sys_socketpair ( a0 , a1 , a [ 2 ] , ( int __user * ) a [ 3 ] ) ;
break ;
case SYS_SEND :
err = sys_send ( a0 , ( void __user * ) a1 , a [ 2 ] , a [ 3 ] ) ;
break ;
case SYS_SENDTO :
err = sys_sendto ( a0 , ( void __user * ) a1 , a [ 2 ] , a [ 3 ] ,
( struct sockaddr __user * ) a [ 4 ] , a [ 5 ] ) ;
break ;
case SYS_RECV :
err = sys_recv ( a0 , ( void __user * ) a1 , a [ 2 ] , a [ 3 ] ) ;
break ;
case SYS_RECVFROM :
err = sys_recvfrom ( a0 , ( void __user * ) a1 , a [ 2 ] , a [ 3 ] ,
( struct sockaddr __user * ) a [ 4 ] ,
( int __user * ) a [ 5 ] ) ;
break ;
case SYS_SHUTDOWN :
err = sys_shutdown ( a0 , a1 ) ;
break ;
case SYS_SETSOCKOPT :
err = sys_setsockopt ( a0 , a1 , a [ 2 ] , ( char __user * ) a [ 3 ] , a [ 4 ] ) ;
break ;
case SYS_GETSOCKOPT :
err =
sys_getsockopt ( a0 , a1 , a [ 2 ] , ( char __user * ) a [ 3 ] ,
( int __user * ) a [ 4 ] ) ;
break ;
case SYS_SENDMSG :
err = sys_sendmsg ( a0 , ( struct msghdr __user * ) a1 , a [ 2 ] ) ;
break ;
case SYS_RECVMSG :
err = sys_recvmsg ( a0 , ( struct msghdr __user * ) a1 , a [ 2 ] ) ;
break ;
2009-10-13 10:40:10 +04:00
case SYS_RECVMMSG :
err = sys_recvmmsg ( a0 , ( struct mmsghdr __user * ) a1 , a [ 2 ] , a [ 3 ] ,
( struct timespec __user * ) a [ 4 ] ) ;
break ;
reintroduce accept4
Introduce a new accept4() system call. The addition of this system call
matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
inotify_init1(), epoll_create1(), pipe2()) which added new system calls
that differed from analogous traditional system calls in adding a flags
argument that can be used to access additional functionality.
The accept4() system call is exactly the same as accept(), except that
it adds a flags bit-mask argument. Two flags are initially implemented.
(Most of the new system calls in 2.6.27 also had both of these flags.)
SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled
for the new file descriptor returned by accept4(). This is a useful
security feature to avoid leaking information in a multithreaded
program where one thread is doing an accept() at the same time as
another thread is doing a fork() plus exec(). More details here:
http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling",
Ulrich Drepper).
The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag
to be enabled on the new open file description created by accept4().
(This flag is merely a convenience, saving the use of additional calls
fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result.
Here's a test program. Works on x86-32. Should work on x86-64, but
I (mtk) don't have a system to hand to test with.
It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies
that the appropriate flags are set on the file descriptor/open file
description returned by accept4().
I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.
/* test_accept4.c
Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
<mtk.manpages@gmail.com>
Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#define PORT_NUM 33333
#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)
/**********************************************************************/
/* The following is what we need until glibc gets a wrapper for
accept4() */
/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK O_NONBLOCK
#endif
#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif
static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
printf("Calling accept4(): flags = %x", flags);
if (flags != 0) {
printf(" (");
if (flags & SOCK_CLOEXEC)
printf("SOCK_CLOEXEC");
if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
printf(" ");
if (flags & SOCK_NONBLOCK)
printf("SOCK_NONBLOCK");
printf(")");
}
printf("\n");
#if USE_SOCKETCALL
long args[6];
args[0] = fd;
args[1] = (long) sockaddr;
args[2] = (long) addrlen;
args[3] = flags;
return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}
/**********************************************************************/
static int
do_test(int lfd, struct sockaddr_in *conn_addr,
int closeonexec_flag, int nonblock_flag)
{
int connfd, acceptfd;
int fdf, flf, fdf_pass, flf_pass;
struct sockaddr_in claddr;
socklen_t addrlen;
printf("=======================================\n");
connfd = socket(AF_INET, SOCK_STREAM, 0);
if (connfd == -1)
die("socket");
if (connect(connfd, (struct sockaddr *) conn_addr,
sizeof(struct sockaddr_in)) == -1)
die("connect");
addrlen = sizeof(struct sockaddr_in);
acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
closeonexec_flag | nonblock_flag);
if (acceptfd == -1) {
perror("accept4()");
close(connfd);
return 0;
}
fdf = fcntl(acceptfd, F_GETFD);
if (fdf == -1)
die("fcntl:F_GETFD");
fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
((closeonexec_flag & SOCK_CLOEXEC) != 0);
printf("Close-on-exec flag is %sset (%s); ",
(fdf & FD_CLOEXEC) ? "" : "not ",
fdf_pass ? "OK" : "failed");
flf = fcntl(acceptfd, F_GETFL);
if (flf == -1)
die("fcntl:F_GETFD");
flf_pass = ((flf & O_NONBLOCK) != 0) ==
((nonblock_flag & SOCK_NONBLOCK) !=0);
printf("nonblock flag is %sset (%s)\n",
(flf & O_NONBLOCK) ? "" : "not ",
flf_pass ? "OK" : "failed");
close(acceptfd);
close(connfd);
printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
return fdf_pass && flf_pass;
}
static int
create_listening_socket(int port_num)
{
struct sockaddr_in svaddr;
int lfd;
int optval;
memset(&svaddr, 0, sizeof(struct sockaddr_in));
svaddr.sin_family = AF_INET;
svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
svaddr.sin_port = htons(port_num);
lfd = socket(AF_INET, SOCK_STREAM, 0);
if (lfd == -1)
die("socket");
optval = 1;
if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
sizeof(optval)) == -1)
die("setsockopt");
if (bind(lfd, (struct sockaddr *) &svaddr,
sizeof(struct sockaddr_in)) == -1)
die("bind");
if (listen(lfd, 5) == -1)
die("listen");
return lfd;
}
int
main(int argc, char *argv[])
{
struct sockaddr_in conn_addr;
int lfd;
int port_num;
int passed;
passed = 1;
port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;
memset(&conn_addr, 0, sizeof(struct sockaddr_in));
conn_addr.sin_family = AF_INET;
conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
conn_addr.sin_port = htons(port_num);
lfd = create_listening_socket(port_num);
if (!do_test(lfd, &conn_addr, 0, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
passed = 0;
if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
passed = 0;
if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
passed = 0;
close(lfd);
exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}
[mtk.manpages@gmail.com: rewrote changelog, updated test program]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-11-20 02:36:14 +03:00
case SYS_ACCEPT4 :
err = sys_accept4 ( a0 , ( struct sockaddr __user * ) a1 ,
( int __user * ) a [ 2 ] , a [ 3 ] ) ;
flag parameters: paccept
This patch is by far the most complex in the series. It adds a new syscall
paccept. This syscall differs from accept in that it adds (at the userlevel)
two additional parameters:
- a signal mask
- a flags value
The flags parameter can be used to set flag like SOCK_CLOEXEC. This is
imlpemented here as well. Some people argued that this is a property which
should be inherited from the file desriptor for the server but this is against
POSIX. Additionally, we really want the signal mask parameter as well
(similar to pselect, ppoll, etc). So an interface change in inevitable.
The flag value is the same as for socket and socketpair. I think diverging
here will only create confusion. Similar to the filesystem interfaces where
the use of the O_* constants differs, it is acceptable here.
The signal mask is handled as for pselect etc. The mask is temporarily
installed for the thread and removed before the call returns. I modeled the
code after pselect. If there is a problem it's likely also in pselect.
For architectures which use socketcall I maintained this interface instead of
adding a system call. The symmetry shouldn't be broken.
The following test must be adjusted for architectures other than x86 and
x86-64 and in case the syscall numbers changed.
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#ifndef __NR_paccept
# ifdef __x86_64__
# define __NR_paccept 288
# elif defined __i386__
# define SYS_PACCEPT 18
# define USE_SOCKETCALL 1
# else
# error "need __NR_paccept"
# endif
#endif
#ifdef USE_SOCKETCALL
# define paccept(fd, addr, addrlen, mask, flags) \
({ long args[6] = { \
(long) fd, (long) addr, (long) addrlen, (long) mask, 8, (long) flags }; \
syscall (__NR_socketcall, SYS_PACCEPT, args); })
#else
# define paccept(fd, addr, addrlen, mask, flags) \
syscall (__NR_paccept, fd, addr, addrlen, mask, 8, flags)
#endif
#define PORT 57392
#define SOCK_CLOEXEC O_CLOEXEC
static pthread_barrier_t b;
static void *
tf (void *arg)
{
pthread_barrier_wait (&b);
int s = socket (AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
s = socket (AF_INET, SOCK_STREAM, 0);
sin.sin_port = htons (PORT);
connect (s, (const struct sockaddr *) &sin, sizeof (sin));
close (s);
pthread_barrier_wait (&b);
pthread_barrier_wait (&b);
sleep (2);
pthread_kill ((pthread_t) arg, SIGUSR1);
return NULL;
}
static void
handler (int s)
{
}
int
main (void)
{
pthread_barrier_init (&b, NULL, 2);
struct sockaddr_in sin;
pthread_t th;
if (pthread_create (&th, NULL, tf, (void *) pthread_self ()) != 0)
{
puts ("pthread_create failed");
return 1;
}
int s = socket (AF_INET, SOCK_STREAM, 0);
int reuse = 1;
setsockopt (s, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof (reuse));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
sin.sin_port = htons (PORT);
bind (s, (struct sockaddr *) &sin, sizeof (sin));
listen (s, SOMAXCONN);
pthread_barrier_wait (&b);
int s2 = paccept (s, NULL, 0, NULL, 0);
if (s2 < 0)
{
puts ("paccept(0) failed");
return 1;
}
int coe = fcntl (s2, F_GETFD);
if (coe & FD_CLOEXEC)
{
puts ("paccept(0) set close-on-exec-flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
s2 = paccept (s, NULL, 0, NULL, SOCK_CLOEXEC);
if (s2 < 0)
{
puts ("paccept(SOCK_CLOEXEC) failed");
return 1;
}
coe = fcntl (s2, F_GETFD);
if ((coe & FD_CLOEXEC) == 0)
{
puts ("paccept(SOCK_CLOEXEC) does not set close-on-exec flag");
return 1;
}
close (s2);
pthread_barrier_wait (&b);
struct sigaction sa;
sa.sa_handler = handler;
sa.sa_flags = 0;
sigemptyset (&sa.sa_mask);
sigaction (SIGUSR1, &sa, NULL);
sigset_t ss;
pthread_sigmask (SIG_SETMASK, NULL, &ss);
sigaddset (&ss, SIGUSR1);
pthread_sigmask (SIG_SETMASK, &ss, NULL);
sigdelset (&ss, SIGUSR1);
alarm (4);
pthread_barrier_wait (&b);
errno = 0 ;
s2 = paccept (s, NULL, 0, &ss, 0);
if (s2 != -1 || errno != EINTR)
{
puts ("paccept did not fail with EINTR");
return 1;
}
close (s);
puts ("OK");
return 0;
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
[akpm@linux-foundation.org: make it compile]
[akpm@linux-foundation.org: add sys_ni stub]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Acked-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: <linux-arch@vger.kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Roland McGrath <roland@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-24 08:29:20 +04:00
break ;
2006-09-01 11:19:31 +04:00
default :
err = - EINVAL ;
break ;
2005-04-17 02:20:36 +04:00
}
return err ;
}
2006-09-01 11:19:31 +04:00
# endif /* __ARCH_WANT_SYS_SOCKETCALL */
2005-04-17 02:20:36 +04:00
2006-09-01 11:23:39 +04:00
/**
* sock_register - add a socket protocol handler
* @ ops : description of protocol
*
2005-04-17 02:20:36 +04:00
* This function is called by a protocol handler that wants to
* advertise its address family , and have it linked into the
2006-09-01 11:23:39 +04:00
* socket interface . The value ops - > family coresponds to the
* socket system call protocol family .
2005-04-17 02:20:36 +04:00
*/
2006-08-10 08:03:17 +04:00
int sock_register ( const struct net_proto_family * ops )
2005-04-17 02:20:36 +04:00
{
int err ;
if ( ops - > family > = NPROTO ) {
2006-09-01 11:19:31 +04:00
printk ( KERN_CRIT " protocol %d >= NPROTO(%d) \n " , ops - > family ,
NPROTO ) ;
2005-04-17 02:20:36 +04:00
return - ENOBUFS ;
}
2006-09-01 11:23:39 +04:00
spin_lock ( & net_family_lock ) ;
if ( net_families [ ops - > family ] )
err = - EEXIST ;
else {
2006-09-01 11:19:31 +04:00
net_families [ ops - > family ] = ops ;
2005-04-17 02:20:36 +04:00
err = 0 ;
}
2006-09-01 11:23:39 +04:00
spin_unlock ( & net_family_lock ) ;
2006-09-01 11:19:31 +04:00
printk ( KERN_INFO " NET: Registered protocol family %d \n " , ops - > family ) ;
2005-04-17 02:20:36 +04:00
return err ;
}
2006-09-01 11:23:39 +04:00
/**
* sock_unregister - remove a protocol handler
* @ family : protocol family to remove
*
2005-04-17 02:20:36 +04:00
* This function is called by a protocol handler that wants to
* remove its address family , and have it unlinked from the
2006-09-01 11:23:39 +04:00
* new socket creation .
*
* If protocol handler is a module , then it can use module reference
* counts to protect against new references . If protocol handler is not
* a module then it needs to provide its own protection in
* the ops - > create routine .
2005-04-17 02:20:36 +04:00
*/
2006-08-10 08:03:17 +04:00
void sock_unregister ( int family )
2005-04-17 02:20:36 +04:00
{
2006-08-10 08:03:17 +04:00
BUG_ON ( family < 0 | | family > = NPROTO ) ;
2005-04-17 02:20:36 +04:00
2006-09-01 11:23:39 +04:00
spin_lock ( & net_family_lock ) ;
2006-09-01 11:19:31 +04:00
net_families [ family ] = NULL ;
2006-09-01 11:23:39 +04:00
spin_unlock ( & net_family_lock ) ;
synchronize_rcu ( ) ;
2006-09-01 11:19:31 +04:00
printk ( KERN_INFO " NET: Unregistered protocol family %d \n " , family ) ;
2005-04-17 02:20:36 +04:00
}
2005-12-22 23:43:42 +03:00
static int __init sock_init ( void )
2005-04-17 02:20:36 +04:00
{
/*
2006-09-01 11:19:31 +04:00
* Initialize sock SLAB cache .
2005-04-17 02:20:36 +04:00
*/
2006-09-01 11:19:31 +04:00
2005-04-17 02:20:36 +04:00
sk_init ( ) ;
/*
2006-09-01 11:19:31 +04:00
* Initialize skbuff SLAB cache
2005-04-17 02:20:36 +04:00
*/
skb_init ( ) ;
/*
2006-09-01 11:19:31 +04:00
* Initialize the protocols module .
2005-04-17 02:20:36 +04:00
*/
init_inodecache ( ) ;
register_filesystem ( & sock_fs_type ) ;
sock_mnt = kern_mount ( & sock_fs_type ) ;
2005-12-22 23:43:42 +03:00
/* The real protocol initialization is performed in later initcalls.
2005-04-17 02:20:36 +04:00
*/
# ifdef CONFIG_NETFILTER
netfilter_init ( ) ;
# endif
2005-12-22 23:58:55 +03:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
2005-12-22 23:43:42 +03:00
core_initcall ( sock_init ) ; /* early initcall */
2005-04-17 02:20:36 +04:00
# ifdef CONFIG_PROC_FS
void socket_seq_show ( struct seq_file * seq )
{
int cpu ;
int counter = 0 ;
2006-04-11 09:52:50 +04:00
for_each_possible_cpu ( cpu )
2006-09-01 11:19:31 +04:00
counter + = per_cpu ( sockets_in_use , cpu ) ;
2005-04-17 02:20:36 +04:00
/* It can be negative, by the way. 8) */
if ( counter < 0 )
counter = 0 ;
seq_printf ( seq , " sockets: used %d \n " , counter ) ;
}
2006-09-01 11:19:31 +04:00
# endif /* CONFIG_PROC_FS */
2005-04-17 02:20:36 +04:00
2006-03-22 10:58:08 +03:00
# ifdef CONFIG_COMPAT
2009-11-07 10:10:54 +03:00
static int do_siocgstamp ( struct net * net , struct socket * sock ,
unsigned int cmd , struct compat_timeval __user * up )
2009-11-07 10:00:29 +03:00
{
mm_segment_t old_fs = get_fs ( ) ;
struct timeval ktv ;
int err ;
set_fs ( KERNEL_DS ) ;
2009-11-07 10:10:54 +03:00
err = sock_do_ioctl ( net , sock , cmd , ( unsigned long ) & ktv ) ;
2009-11-07 10:00:29 +03:00
set_fs ( old_fs ) ;
if ( ! err ) {
err = put_user ( ktv . tv_sec , & up - > tv_sec ) ;
err | = __put_user ( ktv . tv_usec , & up - > tv_usec ) ;
}
return err ;
}
2009-11-07 10:10:54 +03:00
static int do_siocgstampns ( struct net * net , struct socket * sock ,
unsigned int cmd , struct compat_timespec __user * up )
2009-11-07 10:00:29 +03:00
{
mm_segment_t old_fs = get_fs ( ) ;
struct timespec kts ;
int err ;
set_fs ( KERNEL_DS ) ;
2009-11-07 10:10:54 +03:00
err = sock_do_ioctl ( net , sock , cmd , ( unsigned long ) & kts ) ;
2009-11-07 10:00:29 +03:00
set_fs ( old_fs ) ;
if ( ! err ) {
err = put_user ( kts . tv_sec , & up - > tv_sec ) ;
err | = __put_user ( kts . tv_nsec , & up - > tv_nsec ) ;
}
return err ;
}
2009-11-07 10:10:54 +03:00
static int dev_ifname32 ( struct net * net , struct compat_ifreq __user * uifr32 )
2009-11-07 10:00:29 +03:00
{
struct ifreq __user * uifr ;
int err ;
uifr = compat_alloc_user_space ( sizeof ( struct ifreq ) ) ;
2009-11-07 10:10:54 +03:00
if ( copy_in_user ( uifr , uifr32 , sizeof ( struct compat_ifreq ) ) )
2009-11-07 10:00:29 +03:00
return - EFAULT ;
2009-11-07 10:10:54 +03:00
err = dev_ioctl ( net , SIOCGIFNAME , uifr ) ;
2009-11-07 10:00:29 +03:00
if ( err )
return err ;
2009-11-07 10:10:54 +03:00
if ( copy_in_user ( uifr32 , uifr , sizeof ( struct compat_ifreq ) ) )
2009-11-07 10:00:29 +03:00
return - EFAULT ;
return 0 ;
}
2009-11-07 10:10:54 +03:00
static int dev_ifconf ( struct net * net , struct compat_ifconf __user * uifc32 )
2009-11-07 10:00:29 +03:00
{
2009-11-07 10:10:54 +03:00
struct compat_ifconf ifc32 ;
2009-11-07 10:00:29 +03:00
struct ifconf ifc ;
struct ifconf __user * uifc ;
2009-11-07 10:10:54 +03:00
struct compat_ifreq __user * ifr32 ;
2009-11-07 10:00:29 +03:00
struct ifreq __user * ifr ;
unsigned int i , j ;
int err ;
2009-11-07 10:10:54 +03:00
if ( copy_from_user ( & ifc32 , uifc32 , sizeof ( struct compat_ifconf ) ) )
2009-11-07 10:00:29 +03:00
return - EFAULT ;
if ( ifc32 . ifcbuf = = 0 ) {
ifc32 . ifc_len = 0 ;
ifc . ifc_len = 0 ;
ifc . ifc_req = NULL ;
uifc = compat_alloc_user_space ( sizeof ( struct ifconf ) ) ;
} else {
2009-11-07 10:10:54 +03:00
size_t len = ( ( ifc32 . ifc_len / sizeof ( struct compat_ifreq ) ) + 1 ) *
2009-11-07 10:00:29 +03:00
sizeof ( struct ifreq ) ;
uifc = compat_alloc_user_space ( sizeof ( struct ifconf ) + len ) ;
ifc . ifc_len = len ;
ifr = ifc . ifc_req = ( void __user * ) ( uifc + 1 ) ;
ifr32 = compat_ptr ( ifc32 . ifcbuf ) ;
2009-11-07 10:10:54 +03:00
for ( i = 0 ; i < ifc32 . ifc_len ; i + = sizeof ( struct compat_ifreq ) ) {
if ( copy_in_user ( ifr , ifr32 , sizeof ( struct compat_ifreq ) ) )
2009-11-07 10:00:29 +03:00
return - EFAULT ;
ifr + + ;
ifr32 + + ;
}
}
if ( copy_to_user ( uifc , & ifc , sizeof ( struct ifconf ) ) )
return - EFAULT ;
2009-11-07 10:10:54 +03:00
err = dev_ioctl ( net , SIOCGIFCONF , uifc ) ;
2009-11-07 10:00:29 +03:00
if ( err )
return err ;
if ( copy_from_user ( & ifc , uifc , sizeof ( struct ifconf ) ) )
return - EFAULT ;
ifr = ifc . ifc_req ;
ifr32 = compat_ptr ( ifc32 . ifcbuf ) ;
for ( i = 0 , j = 0 ;
2009-11-07 10:10:54 +03:00
i + sizeof ( struct compat_ifreq ) < = ifc32 . ifc_len & & j < ifc . ifc_len ;
i + = sizeof ( struct compat_ifreq ) , j + = sizeof ( struct ifreq ) ) {
if ( copy_in_user ( ifr32 , ifr , sizeof ( struct compat_ifreq ) ) )
2009-11-07 10:00:29 +03:00
return - EFAULT ;
ifr32 + + ;
ifr + + ;
}
if ( ifc32 . ifcbuf = = 0 ) {
/* Translate from 64-bit structure multiple to
* a 32 - bit one .
*/
i = ifc . ifc_len ;
2009-11-07 10:10:54 +03:00
i = ( ( i / sizeof ( struct ifreq ) ) * sizeof ( struct compat_ifreq ) ) ;
2009-11-07 10:00:29 +03:00
ifc32 . ifc_len = i ;
} else {
ifc32 . ifc_len = i ;
}
2009-11-07 10:10:54 +03:00
if ( copy_to_user ( uifc32 , & ifc32 , sizeof ( struct compat_ifconf ) ) )
2009-11-07 10:00:29 +03:00
return - EFAULT ;
return 0 ;
}
2009-11-07 10:10:54 +03:00
static int ethtool_ioctl ( struct net * net , struct compat_ifreq __user * ifr32 )
2009-11-07 10:00:29 +03:00
{
struct ifreq __user * ifr ;
u32 data ;
void __user * datap ;
ifr = compat_alloc_user_space ( sizeof ( * ifr ) ) ;
if ( copy_in_user ( & ifr - > ifr_name , & ifr32 - > ifr_name , IFNAMSIZ ) )
return - EFAULT ;
if ( get_user ( data , & ifr32 - > ifr_ifru . ifru_data ) )
return - EFAULT ;
datap = compat_ptr ( data ) ;
if ( put_user ( datap , & ifr - > ifr_ifru . ifru_data ) )
return - EFAULT ;
2009-11-07 10:10:54 +03:00
return dev_ioctl ( net , SIOCETHTOOL , ifr ) ;
2009-11-07 10:00:29 +03:00
}
2009-11-09 07:57:03 +03:00
static int compat_siocwandev ( struct net * net , struct compat_ifreq __user * uifr32 )
{
void __user * uptr ;
compat_uptr_t uptr32 ;
struct ifreq __user * uifr ;
uifr = compat_alloc_user_space ( sizeof ( * uifr ) ) ;
if ( copy_in_user ( uifr , uifr32 , sizeof ( struct compat_ifreq ) ) )
return - EFAULT ;
if ( get_user ( uptr32 , & uifr32 - > ifr_settings . ifs_ifsu ) )
return - EFAULT ;
uptr = compat_ptr ( uptr32 ) ;
if ( put_user ( uptr , & uifr - > ifr_settings . ifs_ifsu . raw_hdlc ) )
return - EFAULT ;
return dev_ioctl ( net , SIOCWANDEV , uifr ) ;
}
2009-11-07 10:10:54 +03:00
static int bond_ioctl ( struct net * net , unsigned int cmd ,
struct compat_ifreq __user * ifr32 )
2009-11-07 10:00:29 +03:00
{
struct ifreq kifr ;
struct ifreq __user * uifr ;
mm_segment_t old_fs ;
int err ;
u32 data ;
void __user * datap ;
switch ( cmd ) {
case SIOCBONDENSLAVE :
case SIOCBONDRELEASE :
case SIOCBONDSETHWADDR :
case SIOCBONDCHANGEACTIVE :
2009-11-07 10:10:54 +03:00
if ( copy_from_user ( & kifr , ifr32 , sizeof ( struct compat_ifreq ) ) )
2009-11-07 10:00:29 +03:00
return - EFAULT ;
old_fs = get_fs ( ) ;
set_fs ( KERNEL_DS ) ;
2009-11-07 10:10:54 +03:00
err = dev_ioctl ( net , cmd , & kifr ) ;
2009-11-07 10:00:29 +03:00
set_fs ( old_fs ) ;
return err ;
case SIOCBONDSLAVEINFOQUERY :
case SIOCBONDINFOQUERY :
uifr = compat_alloc_user_space ( sizeof ( * uifr ) ) ;
if ( copy_in_user ( & uifr - > ifr_name , & ifr32 - > ifr_name , IFNAMSIZ ) )
return - EFAULT ;
if ( get_user ( data , & ifr32 - > ifr_ifru . ifru_data ) )
return - EFAULT ;
datap = compat_ptr ( data ) ;
if ( put_user ( datap , & uifr - > ifr_ifru . ifru_data ) )
return - EFAULT ;
2009-11-07 10:10:54 +03:00
return dev_ioctl ( net , cmd , uifr ) ;
2009-11-07 10:00:29 +03:00
default :
return - EINVAL ;
} ;
}
2009-11-07 10:10:54 +03:00
static int siocdevprivate_ioctl ( struct net * net , unsigned int cmd ,
struct compat_ifreq __user * u_ifreq32 )
2009-11-07 10:00:29 +03:00
{
struct ifreq __user * u_ifreq64 ;
char tmp_buf [ IFNAMSIZ ] ;
void __user * data64 ;
u32 data32 ;
if ( copy_from_user ( & tmp_buf [ 0 ] , & ( u_ifreq32 - > ifr_ifrn . ifrn_name [ 0 ] ) ,
IFNAMSIZ ) )
return - EFAULT ;
if ( __get_user ( data32 , & u_ifreq32 - > ifr_ifru . ifru_data ) )
return - EFAULT ;
data64 = compat_ptr ( data32 ) ;
u_ifreq64 = compat_alloc_user_space ( sizeof ( * u_ifreq64 ) ) ;
/* Don't check these user accesses, just let that get trapped
* in the ioctl handler instead .
*/
if ( copy_to_user ( & u_ifreq64 - > ifr_ifrn . ifrn_name [ 0 ] , & tmp_buf [ 0 ] ,
IFNAMSIZ ) )
return - EFAULT ;
if ( __put_user ( data64 , & u_ifreq64 - > ifr_ifru . ifru_data ) )
return - EFAULT ;
2009-11-07 10:10:54 +03:00
return dev_ioctl ( net , cmd , u_ifreq64 ) ;
2009-11-07 10:00:29 +03:00
}
2009-11-07 10:10:54 +03:00
static int dev_ifsioc ( struct net * net , struct socket * sock ,
unsigned int cmd , struct compat_ifreq __user * uifr32 )
2009-11-07 10:00:29 +03:00
{
2009-11-11 06:39:40 +03:00
struct ifreq __user * uifr ;
2009-11-07 10:00:29 +03:00
int err ;
2009-11-11 06:39:40 +03:00
uifr = compat_alloc_user_space ( sizeof ( * uifr ) ) ;
if ( copy_in_user ( uifr , uifr32 , sizeof ( * uifr32 ) ) )
return - EFAULT ;
err = sock_do_ioctl ( net , sock , cmd , ( unsigned long ) uifr ) ;
2009-11-07 10:00:29 +03:00
if ( ! err ) {
switch ( cmd ) {
case SIOCGIFFLAGS :
case SIOCGIFMETRIC :
case SIOCGIFMTU :
case SIOCGIFMEM :
case SIOCGIFHWADDR :
case SIOCGIFINDEX :
case SIOCGIFADDR :
case SIOCGIFBRDADDR :
case SIOCGIFDSTADDR :
case SIOCGIFNETMASK :
2009-11-09 07:56:21 +03:00
case SIOCGIFPFLAGS :
2009-11-07 10:00:29 +03:00
case SIOCGIFTXQLEN :
2009-11-09 07:56:21 +03:00
case SIOCGMIIPHY :
case SIOCGMIIREG :
2009-11-11 06:39:40 +03:00
if ( copy_in_user ( uifr32 , uifr , sizeof ( * uifr32 ) ) )
2009-11-07 10:00:29 +03:00
err = - EFAULT ;
break ;
}
}
return err ;
}
2009-11-11 06:39:40 +03:00
static int compat_sioc_ifmap ( struct net * net , unsigned int cmd ,
struct compat_ifreq __user * uifr32 )
{
struct ifreq ifr ;
struct compat_ifmap __user * uifmap32 ;
mm_segment_t old_fs ;
int err ;
uifmap32 = & uifr32 - > ifr_ifru . ifru_map ;
err = copy_from_user ( & ifr , uifr32 , sizeof ( ifr . ifr_name ) ) ;
err | = __get_user ( ifr . ifr_map . mem_start , & uifmap32 - > mem_start ) ;
err | = __get_user ( ifr . ifr_map . mem_end , & uifmap32 - > mem_end ) ;
err | = __get_user ( ifr . ifr_map . base_addr , & uifmap32 - > base_addr ) ;
err | = __get_user ( ifr . ifr_map . irq , & uifmap32 - > irq ) ;
err | = __get_user ( ifr . ifr_map . dma , & uifmap32 - > dma ) ;
err | = __get_user ( ifr . ifr_map . port , & uifmap32 - > port ) ;
if ( err )
return - EFAULT ;
old_fs = get_fs ( ) ;
set_fs ( KERNEL_DS ) ;
err = dev_ioctl ( net , cmd , ( void __user * ) & ifr ) ;
set_fs ( old_fs ) ;
if ( cmd = = SIOCGIFMAP & & ! err ) {
err = copy_to_user ( uifr32 , & ifr , sizeof ( ifr . ifr_name ) ) ;
err | = __put_user ( ifr . ifr_map . mem_start , & uifmap32 - > mem_start ) ;
err | = __put_user ( ifr . ifr_map . mem_end , & uifmap32 - > mem_end ) ;
err | = __put_user ( ifr . ifr_map . base_addr , & uifmap32 - > base_addr ) ;
err | = __put_user ( ifr . ifr_map . irq , & uifmap32 - > irq ) ;
err | = __put_user ( ifr . ifr_map . dma , & uifmap32 - > dma ) ;
err | = __put_user ( ifr . ifr_map . port , & uifmap32 - > port ) ;
if ( err )
err = - EFAULT ;
}
return err ;
}
static int compat_siocshwtstamp ( struct net * net , struct compat_ifreq __user * uifr32 )
{
void __user * uptr ;
compat_uptr_t uptr32 ;
struct ifreq __user * uifr ;
uifr = compat_alloc_user_space ( sizeof ( * uifr ) ) ;
if ( copy_in_user ( uifr , uifr32 , sizeof ( struct compat_ifreq ) ) )
return - EFAULT ;
if ( get_user ( uptr32 , & uifr32 - > ifr_data ) )
return - EFAULT ;
uptr = compat_ptr ( uptr32 ) ;
if ( put_user ( uptr , & uifr - > ifr_data ) )
return - EFAULT ;
return dev_ioctl ( net , SIOCSHWTSTAMP , uifr ) ;
}
2009-11-07 10:00:29 +03:00
struct rtentry32 {
u32 rt_pad1 ;
struct sockaddr rt_dst ; /* target address */
struct sockaddr rt_gateway ; /* gateway addr (RTF_GATEWAY) */
struct sockaddr rt_genmask ; /* target network mask (IP) */
unsigned short rt_flags ;
short rt_pad2 ;
u32 rt_pad3 ;
unsigned char rt_tos ;
unsigned char rt_class ;
short rt_pad4 ;
short rt_metric ; /* +1 for binary compatibility! */
/* char * */ u32 rt_dev ; /* forcing the device at add */
u32 rt_mtu ; /* per route MTU/Window */
u32 rt_window ; /* Window clamping */
unsigned short rt_irtt ; /* Initial RTT */
} ;
struct in6_rtmsg32 {
struct in6_addr rtmsg_dst ;
struct in6_addr rtmsg_src ;
struct in6_addr rtmsg_gateway ;
u32 rtmsg_type ;
u16 rtmsg_dst_len ;
u16 rtmsg_src_len ;
u32 rtmsg_metric ;
u32 rtmsg_info ;
u32 rtmsg_flags ;
s32 rtmsg_ifindex ;
} ;
2009-11-07 10:10:54 +03:00
static int routing_ioctl ( struct net * net , struct socket * sock ,
unsigned int cmd , void __user * argp )
2009-11-07 10:00:29 +03:00
{
int ret ;
void * r = NULL ;
struct in6_rtmsg r6 ;
struct rtentry r4 ;
char devname [ 16 ] ;
u32 rtdev ;
mm_segment_t old_fs = get_fs ( ) ;
2009-11-07 10:10:54 +03:00
if ( sock & & sock - > sk & & sock - > sk - > sk_family = = AF_INET6 ) { /* ipv6 */
struct in6_rtmsg32 __user * ur6 = argp ;
2009-11-07 10:00:29 +03:00
ret = copy_from_user ( & r6 . rtmsg_dst , & ( ur6 - > rtmsg_dst ) ,
3 * sizeof ( struct in6_addr ) ) ;
ret | = __get_user ( r6 . rtmsg_type , & ( ur6 - > rtmsg_type ) ) ;
ret | = __get_user ( r6 . rtmsg_dst_len , & ( ur6 - > rtmsg_dst_len ) ) ;
ret | = __get_user ( r6 . rtmsg_src_len , & ( ur6 - > rtmsg_src_len ) ) ;
ret | = __get_user ( r6 . rtmsg_metric , & ( ur6 - > rtmsg_metric ) ) ;
ret | = __get_user ( r6 . rtmsg_info , & ( ur6 - > rtmsg_info ) ) ;
ret | = __get_user ( r6 . rtmsg_flags , & ( ur6 - > rtmsg_flags ) ) ;
ret | = __get_user ( r6 . rtmsg_ifindex , & ( ur6 - > rtmsg_ifindex ) ) ;
r = ( void * ) & r6 ;
} else { /* ipv4 */
2009-11-07 10:10:54 +03:00
struct rtentry32 __user * ur4 = argp ;
2009-11-07 10:00:29 +03:00
ret = copy_from_user ( & r4 . rt_dst , & ( ur4 - > rt_dst ) ,
3 * sizeof ( struct sockaddr ) ) ;
ret | = __get_user ( r4 . rt_flags , & ( ur4 - > rt_flags ) ) ;
ret | = __get_user ( r4 . rt_metric , & ( ur4 - > rt_metric ) ) ;
ret | = __get_user ( r4 . rt_mtu , & ( ur4 - > rt_mtu ) ) ;
ret | = __get_user ( r4 . rt_window , & ( ur4 - > rt_window ) ) ;
ret | = __get_user ( r4 . rt_irtt , & ( ur4 - > rt_irtt ) ) ;
ret | = __get_user ( rtdev , & ( ur4 - > rt_dev ) ) ;
if ( rtdev ) {
ret | = copy_from_user ( devname , compat_ptr ( rtdev ) , 15 ) ;
r4 . rt_dev = devname ; devname [ 15 ] = 0 ;
} else
r4 . rt_dev = NULL ;
r = ( void * ) & r4 ;
}
if ( ret ) {
ret = - EFAULT ;
goto out ;
}
set_fs ( KERNEL_DS ) ;
2009-11-07 10:10:54 +03:00
ret = sock_do_ioctl ( net , sock , cmd , ( unsigned long ) r ) ;
2009-11-07 10:00:29 +03:00
set_fs ( old_fs ) ;
out :
return ret ;
}
/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
* for some operations ; this forces use of the newer bridge - utils that
* use compatiable ioctls
*/
2009-11-07 10:10:54 +03:00
static int old_bridge_ioctl ( compat_ulong_t __user * argp )
2009-11-07 10:00:29 +03:00
{
2009-11-07 10:10:54 +03:00
compat_ulong_t tmp ;
2009-11-07 10:00:29 +03:00
2009-11-07 10:10:54 +03:00
if ( get_user ( tmp , argp ) )
2009-11-07 10:00:29 +03:00
return - EFAULT ;
if ( tmp = = BRCTL_GET_VERSION )
return BRCTL_VERSION + 1 ;
return - EINVAL ;
}
2009-11-07 10:10:54 +03:00
static int compat_sock_ioctl_trans ( struct file * file , struct socket * sock ,
unsigned int cmd , unsigned long arg )
{
void __user * argp = compat_ptr ( arg ) ;
struct sock * sk = sock - > sk ;
struct net * net = sock_net ( sk ) ;
2009-11-07 10:00:29 +03:00
2009-11-07 10:10:54 +03:00
if ( cmd > = SIOCDEVPRIVATE & & cmd < = ( SIOCDEVPRIVATE + 15 ) )
return siocdevprivate_ioctl ( net , cmd , argp ) ;
switch ( cmd ) {
case SIOCSIFBR :
case SIOCGIFBR :
return old_bridge_ioctl ( argp ) ;
case SIOCGIFNAME :
return dev_ifname32 ( net , argp ) ;
case SIOCGIFCONF :
return dev_ifconf ( net , argp ) ;
case SIOCETHTOOL :
return ethtool_ioctl ( net , argp ) ;
2009-11-09 07:57:03 +03:00
case SIOCWANDEV :
return compat_siocwandev ( net , argp ) ;
2009-11-11 06:39:40 +03:00
case SIOCGIFMAP :
case SIOCSIFMAP :
return compat_sioc_ifmap ( net , cmd , argp ) ;
2009-11-07 10:10:54 +03:00
case SIOCBONDENSLAVE :
case SIOCBONDRELEASE :
case SIOCBONDSETHWADDR :
case SIOCBONDSLAVEINFOQUERY :
case SIOCBONDINFOQUERY :
case SIOCBONDCHANGEACTIVE :
return bond_ioctl ( net , cmd , argp ) ;
case SIOCADDRT :
case SIOCDELRT :
return routing_ioctl ( net , sock , cmd , argp ) ;
case SIOCGSTAMP :
return do_siocgstamp ( net , sock , cmd , argp ) ;
case SIOCGSTAMPNS :
return do_siocgstampns ( net , sock , cmd , argp ) ;
2009-11-11 06:39:40 +03:00
case SIOCSHWTSTAMP :
return compat_siocshwtstamp ( net , argp ) ;
2009-11-07 10:10:54 +03:00
case FIOSETOWN :
case SIOCSPGRP :
case FIOGETOWN :
case SIOCGPGRP :
case SIOCBRADDBR :
case SIOCBRDELBR :
case SIOCGIFVLAN :
case SIOCSIFVLAN :
case SIOCADDDLCI :
case SIOCDELDLCI :
return sock_ioctl ( file , cmd , arg ) ;
case SIOCGIFFLAGS :
case SIOCSIFFLAGS :
case SIOCGIFMETRIC :
case SIOCSIFMETRIC :
case SIOCGIFMTU :
case SIOCSIFMTU :
case SIOCGIFMEM :
case SIOCSIFMEM :
case SIOCGIFHWADDR :
case SIOCSIFHWADDR :
case SIOCADDMULTI :
case SIOCDELMULTI :
case SIOCGIFINDEX :
case SIOCGIFADDR :
case SIOCSIFADDR :
case SIOCSIFHWBROADCAST :
case SIOCDIFADDR :
case SIOCGIFBRDADDR :
case SIOCSIFBRDADDR :
case SIOCGIFDSTADDR :
case SIOCSIFDSTADDR :
case SIOCGIFNETMASK :
case SIOCSIFNETMASK :
case SIOCSIFPFLAGS :
case SIOCGIFPFLAGS :
case SIOCGIFTXQLEN :
case SIOCSIFTXQLEN :
case SIOCBRADDIF :
case SIOCBRDELIF :
2009-11-06 11:09:09 +03:00
case SIOCSIFNAME :
case SIOCGMIIPHY :
case SIOCGMIIREG :
case SIOCSMIIREG :
2009-11-07 10:10:54 +03:00
return dev_ifsioc ( net , sock , cmd , argp ) ;
2009-11-06 11:09:09 +03:00
2009-11-07 10:10:54 +03:00
case SIOCSARP :
case SIOCGARP :
case SIOCDARP :
case SIOCATMARK :
2009-11-06 11:09:09 +03:00
return sock_do_ioctl ( net , sock , cmd , arg ) ;
}
/* Prevent warning from compat_sys_ioctl, these always
* result in - EINVAL in the native case anyway . */
switch ( cmd ) {
case SIOCRTMSG :
case SIOCGIFCOUNT :
2009-11-07 10:10:54 +03:00
case SIOCSRARP :
case SIOCGRARP :
case SIOCDRARP :
2009-11-06 11:09:09 +03:00
case SIOCSIFLINK :
case SIOCGIFSLAVE :
case SIOCSIFSLAVE :
return - EINVAL ;
2009-11-07 10:10:54 +03:00
}
return - ENOIOCTLCMD ;
}
2009-11-07 10:00:29 +03:00
2006-03-22 10:58:08 +03:00
static long compat_sock_ioctl ( struct file * file , unsigned cmd ,
2006-09-01 11:19:31 +04:00
unsigned long arg )
2006-03-22 10:58:08 +03:00
{
struct socket * sock = file - > private_data ;
int ret = - ENOIOCTLCMD ;
2008-06-03 20:14:03 +04:00
struct sock * sk ;
struct net * net ;
sk = sock - > sk ;
net = sock_net ( sk ) ;
2006-03-22 10:58:08 +03:00
if ( sock - > ops - > compat_ioctl )
ret = sock - > ops - > compat_ioctl ( sock , cmd , arg ) ;
2008-06-03 20:14:03 +04:00
if ( ret = = - ENOIOCTLCMD & &
( cmd > = SIOCIWFIRST & & cmd < = SIOCIWLAST ) )
ret = compat_wext_handle_ioctl ( net , cmd , arg ) ;
2009-11-07 10:10:54 +03:00
if ( ret = = - ENOIOCTLCMD )
ret = compat_sock_ioctl_trans ( file , sock , cmd , arg ) ;
2006-03-22 10:58:08 +03:00
return ret ;
}
# endif
2006-08-08 07:57:31 +04:00
int kernel_bind ( struct socket * sock , struct sockaddr * addr , int addrlen )
{
return sock - > ops - > bind ( sock , addr , addrlen ) ;
}
int kernel_listen ( struct socket * sock , int backlog )
{
return sock - > ops - > listen ( sock , backlog ) ;
}
int kernel_accept ( struct socket * sock , struct socket * * newsock , int flags )
{
struct sock * sk = sock - > sk ;
int err ;
err = sock_create_lite ( sk - > sk_family , sk - > sk_type , sk - > sk_protocol ,
newsock ) ;
if ( err < 0 )
goto done ;
err = sock - > ops - > accept ( sock , * newsock , flags ) ;
if ( err < 0 ) {
sock_release ( * newsock ) ;
2007-10-11 08:09:04 +04:00
* newsock = NULL ;
2006-08-08 07:57:31 +04:00
goto done ;
}
( * newsock ) - > ops = sock - > ops ;
2008-12-19 06:35:10 +03:00
__module_get ( ( * newsock ) - > ops - > owner ) ;
2006-08-08 07:57:31 +04:00
done :
return err ;
}
int kernel_connect ( struct socket * sock , struct sockaddr * addr , int addrlen ,
2007-02-09 17:25:31 +03:00
int flags )
2006-08-08 07:57:31 +04:00
{
return sock - > ops - > connect ( sock , addr , addrlen , flags ) ;
}
int kernel_getsockname ( struct socket * sock , struct sockaddr * addr ,
int * addrlen )
{
return sock - > ops - > getname ( sock , addr , addrlen , 0 ) ;
}
int kernel_getpeername ( struct socket * sock , struct sockaddr * addr ,
int * addrlen )
{
return sock - > ops - > getname ( sock , addr , addrlen , 1 ) ;
}
int kernel_getsockopt ( struct socket * sock , int level , int optname ,
char * optval , int * optlen )
{
mm_segment_t oldfs = get_fs ( ) ;
int err ;
set_fs ( KERNEL_DS ) ;
if ( level = = SOL_SOCKET )
err = sock_getsockopt ( sock , level , optname , optval , optlen ) ;
else
err = sock - > ops - > getsockopt ( sock , level , optname , optval ,
optlen ) ;
set_fs ( oldfs ) ;
return err ;
}
int kernel_setsockopt ( struct socket * sock , int level , int optname ,
2009-10-01 03:12:20 +04:00
char * optval , unsigned int optlen )
2006-08-08 07:57:31 +04:00
{
mm_segment_t oldfs = get_fs ( ) ;
int err ;
set_fs ( KERNEL_DS ) ;
if ( level = = SOL_SOCKET )
err = sock_setsockopt ( sock , level , optname , optval , optlen ) ;
else
err = sock - > ops - > setsockopt ( sock , level , optname , optval ,
optlen ) ;
set_fs ( oldfs ) ;
return err ;
}
int kernel_sendpage ( struct socket * sock , struct page * page , int offset ,
size_t size , int flags )
{
if ( sock - > ops - > sendpage )
return sock - > ops - > sendpage ( sock , page , offset , size , flags ) ;
return sock_no_sendpage ( sock , page , offset , size , flags ) ;
}
int kernel_sock_ioctl ( struct socket * sock , int cmd , unsigned long arg )
{
mm_segment_t oldfs = get_fs ( ) ;
int err ;
set_fs ( KERNEL_DS ) ;
err = sock - > ops - > ioctl ( sock , cmd , arg ) ;
set_fs ( oldfs ) ;
return err ;
}
2007-11-13 05:10:39 +03:00
int kernel_sock_shutdown ( struct socket * sock , enum sock_shutdown_cmd how )
{
return sock - > ops - > shutdown ( sock , how ) ;
}
2005-04-17 02:20:36 +04:00
EXPORT_SYMBOL ( sock_create ) ;
EXPORT_SYMBOL ( sock_create_kern ) ;
EXPORT_SYMBOL ( sock_create_lite ) ;
EXPORT_SYMBOL ( sock_map_fd ) ;
EXPORT_SYMBOL ( sock_recvmsg ) ;
EXPORT_SYMBOL ( sock_register ) ;
EXPORT_SYMBOL ( sock_release ) ;
EXPORT_SYMBOL ( sock_sendmsg ) ;
EXPORT_SYMBOL ( sock_unregister ) ;
EXPORT_SYMBOL ( sock_wake_async ) ;
EXPORT_SYMBOL ( sockfd_lookup ) ;
EXPORT_SYMBOL ( kernel_sendmsg ) ;
EXPORT_SYMBOL ( kernel_recvmsg ) ;
2006-08-08 07:57:31 +04:00
EXPORT_SYMBOL ( kernel_bind ) ;
EXPORT_SYMBOL ( kernel_listen ) ;
EXPORT_SYMBOL ( kernel_accept ) ;
EXPORT_SYMBOL ( kernel_connect ) ;
EXPORT_SYMBOL ( kernel_getsockname ) ;
EXPORT_SYMBOL ( kernel_getpeername ) ;
EXPORT_SYMBOL ( kernel_getsockopt ) ;
EXPORT_SYMBOL ( kernel_setsockopt ) ;
EXPORT_SYMBOL ( kernel_sendpage ) ;
EXPORT_SYMBOL ( kernel_sock_ioctl ) ;
2007-11-13 05:10:39 +03:00
EXPORT_SYMBOL ( kernel_sock_shutdown ) ;