2015-07-17 17:38:12 +03:00
/*
* ( C ) 2001 Clemson University and The University of Chicago
*
* Changes by Acxiom Corporation to add protocol version to kernel
* communication , Copyright Acxiom Corporation , 2005.
*
* See COPYING in top - level directory .
*/
# include "protocol.h"
2015-12-04 20:56:14 +03:00
# include "orangefs-kernel.h"
# include "orangefs-dev-proto.h"
# include "orangefs-bufmap.h"
2015-07-17 17:38:12 +03:00
# include <linux/debugfs.h>
# include <linux/slab.h>
/* this file implements the /dev/pvfs2-req device node */
static int open_access_count ;
# define DUMP_DEVICE_ERROR() \
do { \
gossip_err ( " ***************************************************** \n " ) ; \
2015-11-24 23:12:14 +03:00
gossip_err ( " ORANGEFS Device Error: You cannot open the device file " ) ; \
2015-07-17 17:38:12 +03:00
gossip_err ( " \n /dev/%s more than once. Please make sure that \n there " \
2015-11-24 23:12:14 +03:00
" are no " , ORANGEFS_REQDEVICE_NAME ) ; \
2015-07-17 17:38:12 +03:00
gossip_err ( " instances of a program using this device \n currently " \
" running. (You must verify this!) \n " ) ; \
gossip_err ( " For example, you can use the lsof program as follows: \n " ) ; \
gossip_err ( " 'lsof | grep %s' (run this as root) \n " , \
2015-11-24 23:12:14 +03:00
ORANGEFS_REQDEVICE_NAME ) ; \
2015-07-17 17:38:12 +03:00
gossip_err ( " open_access_count = %d \n " , open_access_count ) ; \
gossip_err ( " ***************************************************** \n " ) ; \
} while ( 0 )
static int hash_func ( __u64 tag , int table_size )
{
2015-07-24 17:37:15 +03:00
return do_div ( tag , ( unsigned int ) table_size ) ;
2015-07-17 17:38:12 +03:00
}
2015-11-24 23:12:14 +03:00
static void orangefs_devreq_add_op ( struct orangefs_kernel_op_s * op )
2015-07-17 17:38:12 +03:00
{
int index = hash_func ( op - > tag , hash_table_size ) ;
list_add_tail ( & op - > list , & htable_ops_in_progress [ index ] ) ;
}
2016-02-26 18:21:12 +03:00
/*
* find the op with this tag and remove it from the in progress
* hash table .
*/
2015-11-24 23:12:14 +03:00
static struct orangefs_kernel_op_s * orangefs_devreq_remove_op ( __u64 tag )
2015-07-17 17:38:12 +03:00
{
2015-11-24 23:12:14 +03:00
struct orangefs_kernel_op_s * op , * next ;
2015-07-17 17:38:12 +03:00
int index ;
index = hash_func ( tag , hash_table_size ) ;
spin_lock ( & htable_ops_in_progress_lock ) ;
list_for_each_entry_safe ( op ,
next ,
& htable_ops_in_progress [ index ] ,
list ) {
2016-02-19 02:59:44 +03:00
if ( op - > tag = = tag & & ! op_state_purged ( op ) & &
! op_state_given_up ( op ) ) {
2016-01-23 03:47:47 +03:00
list_del_init ( & op - > list ) ;
2015-07-17 17:38:12 +03:00
spin_unlock ( & htable_ops_in_progress_lock ) ;
return op ;
}
}
spin_unlock ( & htable_ops_in_progress_lock ) ;
return NULL ;
}
2016-03-05 21:17:39 +03:00
/* Returns whether any FS are still pending remounted */
static int mark_all_pending_mounts ( void )
{
int unmounted = 1 ;
struct orangefs_sb_info_s * orangefs_sb = NULL ;
spin_lock ( & orangefs_superblocks_lock ) ;
list_for_each_entry ( orangefs_sb , & orangefs_superblocks , list ) {
/* All of these file system require a remount */
orangefs_sb - > mount_pending = 1 ;
unmounted = 0 ;
}
spin_unlock ( & orangefs_superblocks_lock ) ;
return unmounted ;
}
/*
* Determine if a given file system needs to be remounted or not
* Returns - 1 on error
* 0 if already mounted
* 1 if needs remount
*/
static int fs_mount_pending ( __s32 fsid )
{
int mount_pending = - 1 ;
struct orangefs_sb_info_s * orangefs_sb = NULL ;
spin_lock ( & orangefs_superblocks_lock ) ;
list_for_each_entry ( orangefs_sb , & orangefs_superblocks , list ) {
if ( orangefs_sb - > fs_id = = fsid ) {
mount_pending = orangefs_sb - > mount_pending ;
break ;
}
}
spin_unlock ( & orangefs_superblocks_lock ) ;
return mount_pending ;
}
2015-11-24 23:12:14 +03:00
static int orangefs_devreq_open ( struct inode * inode , struct file * file )
2015-07-17 17:38:12 +03:00
{
int ret = - EINVAL ;
if ( ! ( file - > f_flags & O_NONBLOCK ) ) {
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
gossip_err ( " %s: device cannot be opened in blocking mode \n " ,
__func__ ) ;
2015-07-17 17:38:12 +03:00
goto out ;
}
ret = - EACCES ;
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG , " client-core: opening device \n " ) ;
2015-07-17 17:38:12 +03:00
mutex_lock ( & devreq_mutex ) ;
if ( open_access_count = = 0 ) {
2016-01-23 03:46:08 +03:00
open_access_count = 1 ;
2016-01-19 20:00:26 +03:00
ret = 0 ;
2015-07-17 17:38:12 +03:00
} else {
DUMP_DEVICE_ERROR ( ) ;
}
mutex_unlock ( & devreq_mutex ) ;
out :
gossip_debug ( GOSSIP_DEV_DEBUG ,
" pvfs2-client-core: open device complete (ret = %d) \n " ,
ret ) ;
return ret ;
}
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
/* Function for read() callers into the device */
2015-11-24 23:12:14 +03:00
static ssize_t orangefs_devreq_read ( struct file * file ,
2015-07-17 17:38:12 +03:00
char __user * buf ,
size_t count , loff_t * offset )
{
2015-11-24 23:12:14 +03:00
struct orangefs_kernel_op_s * op , * temp ;
__s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION ;
static __s32 magic = ORANGEFS_DEVREQ_MAGIC ;
struct orangefs_kernel_op_s * cur_op = NULL ;
2015-11-13 22:26:10 +03:00
unsigned long ret ;
2015-07-17 17:38:12 +03:00
2015-11-13 22:26:10 +03:00
/* We do not support blocking IO. */
2015-07-17 17:38:12 +03:00
if ( ! ( file - > f_flags & O_NONBLOCK ) ) {
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
gossip_err ( " %s: blocking read from client-core. \n " ,
__func__ ) ;
2015-07-17 17:38:12 +03:00
return - EINVAL ;
2015-11-13 22:26:10 +03:00
}
/*
2015-12-15 22:22:06 +03:00
* The client will do an ioctl to find MAX_DEV_REQ_UPSIZE , then
2015-11-13 22:26:10 +03:00
* always read with that size buffer .
*/
2015-12-15 22:22:06 +03:00
if ( count ! = MAX_DEV_REQ_UPSIZE ) {
2015-11-13 22:26:10 +03:00
gossip_err ( " orangefs: client-core tried to read wrong size \n " ) ;
return - EINVAL ;
}
2016-01-23 03:47:47 +03:00
restart :
2015-11-13 22:26:10 +03:00
/* Get next op (if any) from top of list. */
2015-11-24 23:12:14 +03:00
spin_lock ( & orangefs_request_list_lock ) ;
list_for_each_entry_safe ( op , temp , & orangefs_request_list , list ) {
2015-11-13 22:26:10 +03:00
__s32 fsid ;
/* This lock is held past the end of the loop when we break. */
spin_lock ( & op - > lock ) ;
2016-02-19 02:59:44 +03:00
if ( unlikely ( op_state_purged ( op ) | | op_state_given_up ( op ) ) ) {
2016-01-23 03:47:47 +03:00
spin_unlock ( & op - > lock ) ;
continue ;
}
2015-11-13 22:26:10 +03:00
fsid = fsid_of_op ( op ) ;
2015-11-24 23:12:14 +03:00
if ( fsid ! = ORANGEFS_FS_ID_NULL ) {
2015-11-13 22:26:10 +03:00
int ret ;
/* Skip ops whose filesystem needs to be mounted. */
ret = fs_mount_pending ( fsid ) ;
if ( ret = = 1 ) {
2015-07-17 17:38:12 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG ,
2016-02-04 21:29:27 +03:00
" %s: mount pending, skipping op tag "
" %llu %s \n " ,
__func__ ,
llu ( op - > tag ) ,
get_opname_string ( op ) ) ;
2015-11-13 22:26:10 +03:00
spin_unlock ( & op - > lock ) ;
continue ;
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
/*
* Skip ops whose filesystem we don ' t know about unless
* it is being mounted .
*/
2015-11-13 22:26:10 +03:00
/* XXX: is there a better way to detect this? */
} else if ( ret = = - 1 & &
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
! ( op - > upcall . type = =
ORANGEFS_VFS_OP_FS_MOUNT | |
op - > upcall . type = =
ORANGEFS_VFS_OP_GETATTR ) ) {
2015-11-13 22:26:10 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG ,
" orangefs: skipping op tag %llu %s \n " ,
llu ( op - > tag ) , get_opname_string ( op ) ) ;
gossip_err (
" orangefs: ERROR: fs_mount_pending %d \n " ,
fsid ) ;
spin_unlock ( & op - > lock ) ;
2015-07-17 17:38:12 +03:00
continue ;
}
}
2015-11-13 22:26:10 +03:00
/*
* Either this op does not pertain to a filesystem , is mounting
* a filesystem , or pertains to a mounted filesystem . Let it
* through .
*/
cur_op = op ;
break ;
}
/*
* At this point we either have a valid op and can continue or have not
* found an op and must ask the client to try again later .
*/
if ( ! cur_op ) {
2015-11-24 23:12:14 +03:00
spin_unlock ( & orangefs_request_list_lock ) ;
2015-11-13 22:26:10 +03:00
return - EAGAIN ;
2015-07-17 17:38:12 +03:00
}
2016-02-26 18:21:12 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG , " %s: reading op tag %llu %s \n " ,
__func__ ,
llu ( cur_op - > tag ) ,
get_opname_string ( cur_op ) ) ;
2015-07-17 17:38:12 +03:00
2015-11-13 22:26:10 +03:00
/*
* Such an op should never be on the list in the first place . If so , we
* will abort .
*/
if ( op_state_in_progress ( cur_op ) | | op_state_serviced ( cur_op ) ) {
gossip_err ( " orangefs: ERROR: Current op already queued. \n " ) ;
2016-02-19 02:59:44 +03:00
list_del_init ( & cur_op - > list ) ;
2015-07-17 17:38:12 +03:00
spin_unlock ( & cur_op - > lock ) ;
2015-11-24 23:12:14 +03:00
spin_unlock ( & orangefs_request_list_lock ) ;
2015-11-13 22:26:10 +03:00
return - EAGAIN ;
2015-07-17 17:38:12 +03:00
}
2016-02-26 18:21:12 +03:00
2016-01-23 03:47:47 +03:00
list_del_init ( & cur_op - > list ) ;
2015-11-24 23:12:14 +03:00
spin_unlock ( & orangefs_request_list_lock ) ;
2016-01-23 03:47:47 +03:00
2015-11-13 22:26:10 +03:00
spin_unlock ( & cur_op - > lock ) ;
/* Push the upcall out. */
ret = copy_to_user ( buf , & proto_ver , sizeof ( __s32 ) ) ;
if ( ret ! = 0 )
goto error ;
ret = copy_to_user ( buf + sizeof ( __s32 ) , & magic , sizeof ( __s32 ) ) ;
if ( ret ! = 0 )
goto error ;
ret = copy_to_user ( buf + 2 * sizeof ( __s32 ) , & cur_op - > tag , sizeof ( __u64 ) ) ;
if ( ret ! = 0 )
goto error ;
ret = copy_to_user ( buf + 2 * sizeof ( __s32 ) + sizeof ( __u64 ) , & cur_op - > upcall ,
2015-11-24 23:12:14 +03:00
sizeof ( struct orangefs_upcall_s ) ) ;
2015-11-13 22:26:10 +03:00
if ( ret ! = 0 )
goto error ;
2016-01-23 03:47:47 +03:00
spin_lock ( & htable_ops_in_progress_lock ) ;
spin_lock ( & cur_op - > lock ) ;
if ( unlikely ( op_state_given_up ( cur_op ) ) ) {
spin_unlock ( & cur_op - > lock ) ;
spin_unlock ( & htable_ops_in_progress_lock ) ;
2016-02-19 02:59:44 +03:00
complete ( & cur_op - > waitq ) ;
2016-01-23 03:47:47 +03:00
goto restart ;
}
/*
* Set the operation to be in progress and move it between lists since
* it has been sent to the client .
*/
set_op_state_inprogress ( cur_op ) ;
2016-03-03 21:46:48 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG ,
" %s: 1 op:%s: op_state:%d: process:%s: \n " ,
__func__ ,
get_opname_string ( cur_op ) ,
cur_op - > op_state ,
current - > comm ) ;
2016-01-23 03:47:47 +03:00
orangefs_devreq_add_op ( cur_op ) ;
spin_unlock ( & cur_op - > lock ) ;
spin_unlock ( & htable_ops_in_progress_lock ) ;
2015-11-13 22:26:10 +03:00
/* The client only asks to read one size buffer. */
2015-12-15 22:22:06 +03:00
return MAX_DEV_REQ_UPSIZE ;
2015-11-13 22:26:10 +03:00
error :
/*
* We were unable to copy the op data to the client . Put the op back in
* list . If client has crashed , the op will be purged later when the
* device is released .
*/
gossip_err ( " orangefs: Failed to copy data to user space \n " ) ;
2015-11-24 23:12:14 +03:00
spin_lock ( & orangefs_request_list_lock ) ;
2015-11-13 22:26:10 +03:00
spin_lock ( & cur_op - > lock ) ;
2016-01-23 03:47:47 +03:00
if ( likely ( ! op_state_given_up ( cur_op ) ) ) {
set_op_state_waiting ( cur_op ) ;
2016-03-03 21:46:48 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG ,
" %s: 2 op:%s: op_state:%d: process:%s: \n " ,
__func__ ,
get_opname_string ( cur_op ) ,
cur_op - > op_state ,
current - > comm ) ;
2016-01-23 03:47:47 +03:00
list_add ( & cur_op - > list , & orangefs_request_list ) ;
2016-02-19 02:59:44 +03:00
spin_unlock ( & cur_op - > lock ) ;
} else {
spin_unlock ( & cur_op - > lock ) ;
complete ( & cur_op - > waitq ) ;
2016-01-23 03:47:47 +03:00
}
2015-11-24 23:12:14 +03:00
spin_unlock ( & orangefs_request_list_lock ) ;
2015-11-13 22:26:10 +03:00
return - EFAULT ;
2015-07-17 17:38:12 +03:00
}
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
/*
2016-01-13 19:18:12 +03:00
* Function for writev ( ) callers into the device .
*
* Userspace should have written :
* - __u32 version
* - __u32 magic
* - __u64 tag
* - struct orangefs_downcall_s
* - trailer buffer ( in the case of READDIR operations )
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
*/
2016-01-13 19:18:12 +03:00
static ssize_t orangefs_devreq_write_iter ( struct kiocb * iocb ,
struct iov_iter * iter )
2015-07-17 17:38:12 +03:00
{
2016-01-13 19:18:12 +03:00
ssize_t ret ;
2015-11-24 23:12:14 +03:00
struct orangefs_kernel_op_s * op = NULL ;
2016-01-13 19:18:12 +03:00
struct {
__u32 version ;
__u32 magic ;
__u64 tag ;
} head ;
int total = ret = iov_iter_count ( iter ) ;
int n ;
int downcall_size = sizeof ( struct orangefs_downcall_s ) ;
int head_size = sizeof ( head ) ;
gossip_debug ( GOSSIP_DEV_DEBUG , " %s: total:%d: ret:%zd: \n " ,
__func__ ,
total ,
ret ) ;
2015-07-17 17:38:12 +03:00
2016-01-13 19:18:12 +03:00
if ( total < MAX_DEV_REQ_DOWNSIZE ) {
2016-01-19 20:04:40 +03:00
gossip_err ( " %s: total:%d: must be at least:%u: \n " ,
2016-01-13 19:18:12 +03:00
__func__ ,
total ,
2016-01-19 20:04:40 +03:00
( unsigned int ) MAX_DEV_REQ_DOWNSIZE ) ;
2016-01-23 03:47:47 +03:00
return - EFAULT ;
2015-07-17 17:38:12 +03:00
}
2016-01-13 19:18:12 +03:00
n = copy_from_iter ( & head , head_size , iter ) ;
if ( n < head_size ) {
gossip_err ( " %s: failed to copy head. \n " , __func__ ) ;
2016-01-23 03:47:47 +03:00
return - EFAULT ;
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
}
2016-01-13 19:18:12 +03:00
if ( head . version < ORANGEFS_MINIMUM_USERSPACE_VERSION ) {
gossip_err ( " %s: userspace claims version "
" %d, minimum version required: %d. \n " ,
__func__ ,
head . version ,
ORANGEFS_MINIMUM_USERSPACE_VERSION ) ;
2016-01-23 03:47:47 +03:00
return - EPROTO ;
2015-07-17 17:38:12 +03:00
}
2016-01-13 19:18:12 +03:00
if ( head . magic ! = ORANGEFS_DEVREQ_MAGIC ) {
gossip_err ( " Error: Device magic number does not match. \n " ) ;
2016-01-23 03:47:47 +03:00
return - EPROTO ;
2016-01-13 19:18:12 +03:00
}
2015-07-17 17:38:12 +03:00
2016-02-26 18:21:12 +03:00
/* remove the op from the in progress hash table */
2016-01-13 19:18:12 +03:00
op = orangefs_devreq_remove_op ( head . tag ) ;
if ( ! op ) {
gossip_err ( " WARNING: No one's waiting for tag %llu \n " ,
llu ( head . tag ) ) ;
2016-01-23 03:47:47 +03:00
return ret ;
2016-01-13 19:18:12 +03:00
}
2015-07-17 17:38:12 +03:00
2016-01-13 19:18:12 +03:00
n = copy_from_iter ( & op - > downcall , downcall_size , iter ) ;
if ( n ! = downcall_size ) {
gossip_err ( " %s: failed to copy downcall. \n " , __func__ ) ;
2016-02-19 02:53:41 +03:00
goto Efault ;
2015-07-17 17:38:12 +03:00
}
2016-01-13 19:18:12 +03:00
if ( op - > downcall . status )
goto wakeup ;
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
2016-01-13 19:18:12 +03:00
/*
* We ' ve successfully peeled off the head and the downcall .
* Something has gone awry if total doesn ' t equal the
* sum of head_size , downcall_size and trailer_size .
*/
if ( ( head_size + downcall_size + op - > downcall . trailer_size ) ! = total ) {
gossip_err ( " %s: funky write, head_size:%d "
" : downcall_size:%d: trailer_size:%lld "
" : total size:%d: \n " ,
__func__ ,
head_size ,
downcall_size ,
op - > downcall . trailer_size ,
total ) ;
2016-02-19 02:53:41 +03:00
goto Efault ;
2016-01-13 19:18:12 +03:00
}
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
2016-01-13 19:18:12 +03:00
/* Only READDIR operations should have trailers. */
if ( ( op - > downcall . type ! = ORANGEFS_VFS_OP_READDIR ) & &
( op - > downcall . trailer_size ! = 0 ) ) {
gossip_err ( " %s: %x operation with trailer. " ,
__func__ ,
op - > downcall . type ) ;
2016-02-19 02:53:41 +03:00
goto Efault ;
2016-01-13 19:18:12 +03:00
}
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
2016-01-13 19:18:12 +03:00
/* READDIR operations should always have trailers. */
if ( ( op - > downcall . type = = ORANGEFS_VFS_OP_READDIR ) & &
( op - > downcall . trailer_size = = 0 ) ) {
gossip_err ( " %s: %x operation with no trailer. " ,
__func__ ,
op - > downcall . type ) ;
2016-02-19 02:53:41 +03:00
goto Efault ;
2016-01-13 19:18:12 +03:00
}
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
2016-01-13 19:18:12 +03:00
if ( op - > downcall . type ! = ORANGEFS_VFS_OP_READDIR )
goto wakeup ;
2015-07-17 17:38:12 +03:00
2016-01-13 19:18:12 +03:00
op - > downcall . trailer_buf =
vmalloc ( op - > downcall . trailer_size ) ;
if ( op - > downcall . trailer_buf = = NULL ) {
gossip_err ( " %s: failed trailer vmalloc. \n " ,
__func__ ) ;
2016-02-19 02:53:41 +03:00
goto Enomem ;
2016-01-13 19:18:12 +03:00
}
memset ( op - > downcall . trailer_buf , 0 , op - > downcall . trailer_size ) ;
n = copy_from_iter ( op - > downcall . trailer_buf ,
op - > downcall . trailer_size ,
iter ) ;
if ( n ! = op - > downcall . trailer_size ) {
gossip_err ( " %s: failed to copy trailer. \n " , __func__ ) ;
vfree ( op - > downcall . trailer_buf ) ;
2016-02-19 02:53:41 +03:00
goto Efault ;
2016-01-13 19:18:12 +03:00
}
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
2016-01-13 19:18:12 +03:00
wakeup :
2016-01-23 21:45:46 +03:00
/*
2016-02-26 22:39:08 +03:00
* Return to vfs waitqueue , and back to service_operation
* through wait_for_matching_downcall .
2016-01-23 21:45:46 +03:00
*/
spin_lock ( & op - > lock ) ;
2016-02-19 02:53:41 +03:00
if ( unlikely ( op_is_cancel ( op ) ) ) {
2016-01-23 21:45:46 +03:00
spin_unlock ( & op - > lock ) ;
2016-02-12 07:07:19 +03:00
put_cancel ( op ) ;
2016-02-19 02:53:41 +03:00
} else if ( unlikely ( op_state_given_up ( op ) ) ) {
spin_unlock ( & op - > lock ) ;
2016-02-19 02:59:44 +03:00
complete ( & op - > waitq ) ;
2016-02-19 02:53:41 +03:00
} else {
set_op_state_serviced ( op ) ;
2016-03-03 21:46:48 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG ,
" %s: op:%s: op_state:%d: process:%s: \n " ,
__func__ ,
get_opname_string ( op ) ,
op - > op_state ,
current - > comm ) ;
2016-02-19 02:53:41 +03:00
spin_unlock ( & op - > lock ) ;
}
2016-01-13 19:18:12 +03:00
return ret ;
2016-01-23 03:47:47 +03:00
2016-02-19 02:53:41 +03:00
Efault :
op - > downcall . status = - ( ORANGEFS_ERROR_BIT | 9 ) ;
ret = - EFAULT ;
goto wakeup ;
Enomem :
op - > downcall . status = - ( ORANGEFS_ERROR_BIT | 8 ) ;
ret = - ENOMEM ;
goto wakeup ;
2015-07-17 17:38:12 +03:00
}
/*
* NOTE : gets called when the last reference to this device is dropped .
* Using the open_access_count variable , we enforce a reference count
* on this file so that it can be opened by only one process at a time .
* the devreq_mutex is used to make sure all i / o has completed
2015-11-24 23:12:14 +03:00
* before we call orangefs_bufmap_finalize , and similar such tricky
2015-07-17 17:38:12 +03:00
* situations
*/
2015-11-24 23:12:14 +03:00
static int orangefs_devreq_release ( struct inode * inode , struct file * file )
2015-07-17 17:38:12 +03:00
{
int unmounted = 0 ;
gossip_debug ( GOSSIP_DEV_DEBUG ,
" %s:pvfs2-client-core: exiting, closing device \n " ,
__func__ ) ;
mutex_lock ( & devreq_mutex ) ;
2016-02-14 05:01:21 +03:00
orangefs_bufmap_finalize ( ) ;
2015-07-17 17:38:12 +03:00
2016-01-23 03:46:08 +03:00
open_access_count = - 1 ;
2015-07-17 17:38:12 +03:00
unmounted = mark_all_pending_mounts ( ) ;
2015-11-24 23:12:14 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG , " ORANGEFS Device Close: Filesystem(s) %s \n " ,
2015-07-17 17:38:12 +03:00
( unmounted ? " UNMOUNTED " : " MOUNTED " ) ) ;
purge_waiting_ops ( ) ;
purge_inprogress_ops ( ) ;
2016-02-14 05:01:21 +03:00
orangefs_bufmap_run_down ( ) ;
2015-07-17 17:38:12 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG ,
" pvfs2-client-core: device close complete \n " ) ;
2016-01-23 03:46:08 +03:00
open_access_count = 0 ;
mutex_unlock ( & devreq_mutex ) ;
2015-07-17 17:38:12 +03:00
return 0 ;
}
int is_daemon_in_service ( void )
{
int in_service ;
/*
* What this function does is checks if client - core is alive
* based on the access count we maintain on the device .
*/
mutex_lock ( & devreq_mutex ) ;
in_service = open_access_count = = 1 ? 0 : - EIO ;
mutex_unlock ( & devreq_mutex ) ;
return in_service ;
}
2016-02-12 07:07:19 +03:00
bool __is_daemon_in_service ( void )
{
return open_access_count = = 1 ;
}
2015-07-17 17:38:12 +03:00
static inline long check_ioctl_command ( unsigned int command )
{
/* Check for valid ioctl codes */
2015-11-24 23:12:14 +03:00
if ( _IOC_TYPE ( command ) ! = ORANGEFS_DEV_MAGIC ) {
2015-07-17 17:38:12 +03:00
gossip_err ( " device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x] \n " ,
command ,
_IOC_TYPE ( command ) ,
2015-11-24 23:12:14 +03:00
ORANGEFS_DEV_MAGIC ) ;
2015-07-17 17:38:12 +03:00
return - EINVAL ;
}
/* and valid ioctl commands */
2015-11-24 23:12:14 +03:00
if ( _IOC_NR ( command ) > = ORANGEFS_DEV_MAXNR | | _IOC_NR ( command ) < = 0 ) {
2015-07-17 17:38:12 +03:00
gossip_err ( " Invalid ioctl command number [%d >= %d] \n " ,
2015-11-24 23:12:14 +03:00
_IOC_NR ( command ) , ORANGEFS_DEV_MAXNR ) ;
2015-07-17 17:38:12 +03:00
return - ENOIOCTLCMD ;
}
return 0 ;
}
static long dispatch_ioctl_command ( unsigned int command , unsigned long arg )
{
2015-11-24 23:12:14 +03:00
static __s32 magic = ORANGEFS_DEVREQ_MAGIC ;
2015-12-15 22:22:06 +03:00
static __s32 max_up_size = MAX_DEV_REQ_UPSIZE ;
static __s32 max_down_size = MAX_DEV_REQ_DOWNSIZE ;
2015-11-24 23:12:14 +03:00
struct ORANGEFS_dev_map_desc user_desc ;
2015-07-17 17:38:12 +03:00
int ret = 0 ;
struct dev_mask_info_s mask_info = { 0 } ;
struct dev_mask2_info_s mask2_info = { 0 , 0 } ;
int upstream_kmod = 1 ;
2016-03-26 02:56:34 +03:00
struct orangefs_sb_info_s * orangefs_sb ;
2015-07-17 17:38:12 +03:00
/* mtmoore: add locking here */
switch ( command ) {
2015-11-24 23:12:14 +03:00
case ORANGEFS_DEV_GET_MAGIC :
2015-07-17 17:38:12 +03:00
return ( ( put_user ( magic , ( __s32 __user * ) arg ) = = - EFAULT ) ?
- EIO :
0 ) ;
2015-11-24 23:12:14 +03:00
case ORANGEFS_DEV_GET_MAX_UPSIZE :
2015-07-17 17:38:12 +03:00
return ( ( put_user ( max_up_size ,
( __s32 __user * ) arg ) = = - EFAULT ) ?
- EIO :
0 ) ;
2015-11-24 23:12:14 +03:00
case ORANGEFS_DEV_GET_MAX_DOWNSIZE :
2015-07-17 17:38:12 +03:00
return ( ( put_user ( max_down_size ,
( __s32 __user * ) arg ) = = - EFAULT ) ?
- EIO :
0 ) ;
2015-11-24 23:12:14 +03:00
case ORANGEFS_DEV_MAP :
2015-07-17 17:38:12 +03:00
ret = copy_from_user ( & user_desc ,
2015-11-24 23:12:14 +03:00
( struct ORANGEFS_dev_map_desc __user * )
2015-07-17 17:38:12 +03:00
arg ,
2015-11-24 23:12:14 +03:00
sizeof ( struct ORANGEFS_dev_map_desc ) ) ;
2016-02-14 05:01:21 +03:00
/* WTF -EIO and not -EFAULT? */
return ret ? - EIO : orangefs_bufmap_initialize ( & user_desc ) ;
2015-11-24 23:12:14 +03:00
case ORANGEFS_DEV_REMOUNT_ALL :
2015-07-17 17:38:12 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG ,
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
" %s: got ORANGEFS_DEV_REMOUNT_ALL \n " ,
__func__ ) ;
2015-07-17 17:38:12 +03:00
/*
2015-11-24 23:12:14 +03:00
* remount all mounted orangefs volumes to regain the lost
2015-07-17 17:38:12 +03:00
* dynamic mount tables ( if any ) - - NOTE : this is done
* without keeping the superblock list locked due to the
2016-02-25 00:54:27 +03:00
* upcall / downcall waiting . also , the request mutex is
2015-07-17 17:38:12 +03:00
* used to ensure that no operations will be serviced until
* all of the remounts are serviced ( to avoid ops between
* mounts to fail )
*/
ret = mutex_lock_interruptible ( & request_mutex ) ;
if ( ret < 0 )
return ret ;
gossip_debug ( GOSSIP_DEV_DEBUG ,
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
" %s: priority remount in progress \n " ,
__func__ ) ;
2016-03-26 02:56:34 +03:00
spin_lock ( & orangefs_superblocks_lock ) ;
list_for_each_entry ( orangefs_sb , & orangefs_superblocks , list ) {
/*
* We have to drop the spinlock , so entries can be
* removed . They can ' t be freed , though , so we just
* keep the forward pointers and zero the back ones -
* that way we can get to the rest of the list .
*/
if ( ! orangefs_sb - > list . prev )
continue ;
gossip_debug ( GOSSIP_DEV_DEBUG ,
" %s: Remounting SB %p \n " ,
__func__ ,
orangefs_sb ) ;
spin_unlock ( & orangefs_superblocks_lock ) ;
ret = orangefs_remount ( orangefs_sb ) ;
spin_lock ( & orangefs_superblocks_lock ) ;
if ( ret ) {
2015-07-17 17:38:12 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG ,
2016-03-26 02:56:34 +03:00
" SB %p remount failed \n " ,
2015-11-24 23:12:14 +03:00
orangefs_sb ) ;
2016-03-26 02:56:34 +03:00
break ;
2015-07-17 17:38:12 +03:00
}
}
2016-03-26 02:56:34 +03:00
spin_unlock ( & orangefs_superblocks_lock ) ;
2015-07-17 17:38:12 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG ,
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
" %s: priority remount complete \n " ,
__func__ ) ;
2015-07-17 17:38:12 +03:00
mutex_unlock ( & request_mutex ) ;
return ret ;
2015-11-24 23:12:14 +03:00
case ORANGEFS_DEV_UPSTREAM :
2015-07-17 17:38:12 +03:00
ret = copy_to_user ( ( void __user * ) arg ,
& upstream_kmod ,
sizeof ( upstream_kmod ) ) ;
if ( ret ! = 0 )
return - EIO ;
else
return ret ;
2015-11-24 23:12:14 +03:00
case ORANGEFS_DEV_CLIENT_MASK :
2015-07-17 17:38:12 +03:00
ret = copy_from_user ( & mask2_info ,
( void __user * ) arg ,
sizeof ( struct dev_mask2_info_s ) ) ;
if ( ret ! = 0 )
return - EIO ;
client_debug_mask . mask1 = mask2_info . mask1_value ;
client_debug_mask . mask2 = mask2_info . mask2_value ;
pr_info ( " %s: client debug mask has been been received "
" :%llx: :%llx: \n " ,
__func__ ,
( unsigned long long ) client_debug_mask . mask1 ,
( unsigned long long ) client_debug_mask . mask2 ) ;
return ret ;
2015-11-24 23:12:14 +03:00
case ORANGEFS_DEV_CLIENT_STRING :
2015-07-17 17:38:12 +03:00
ret = copy_from_user ( & client_debug_array_string ,
( void __user * ) arg ,
2015-11-24 23:12:14 +03:00
ORANGEFS_MAX_DEBUG_STRING_LEN ) ;
2016-03-14 22:28:34 +03:00
/*
* The real client - core makes an effort to ensure
* that actual strings that aren ' t too long to fit in
* this buffer is what we get here . We ' re going to use
* string functions on the stuff we got , so we ' ll make
* this extra effort to try and keep from
* flowing out of this buffer when we use the string
* functions , even if somehow the stuff we end up
* with here is garbage .
*/
client_debug_array_string [ ORANGEFS_MAX_DEBUG_STRING_LEN - 1 ] =
' \0 ' ;
2015-07-17 17:38:12 +03:00
if ( ret ! = 0 ) {
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
pr_info ( " %s: CLIENT_STRING: copy_from_user failed \n " ,
2015-07-17 17:38:12 +03:00
__func__ ) ;
return - EIO ;
}
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
pr_info ( " %s: client debug array string has been received. \n " ,
2015-07-17 17:38:12 +03:00
__func__ ) ;
if ( ! help_string_initialized ) {
/* Free the "we don't know yet" default string... */
kfree ( debug_help_string ) ;
/* build a proper debug help string */
if ( orangefs_prepare_debugfs_help_string ( 0 ) ) {
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
gossip_err ( " %s: no debug help string \n " ,
2015-07-17 17:38:12 +03:00
__func__ ) ;
return - EIO ;
}
/* Replace the boilerplate boot-time debug-help file. */
debugfs_remove ( help_file_dentry ) ;
help_file_dentry =
debugfs_create_file (
ORANGEFS_KMOD_DEBUG_HELP_FILE ,
0444 ,
debug_dir ,
debug_help_string ,
& debug_help_fops ) ;
if ( ! help_file_dentry ) {
gossip_err ( " %s: debugfs_create_file failed for "
" :%s:! \n " ,
__func__ ,
ORANGEFS_KMOD_DEBUG_HELP_FILE ) ;
return - EIO ;
}
}
debug_mask_to_string ( & client_debug_mask , 1 ) ;
debugfs_remove ( client_debug_dentry ) ;
2015-11-24 23:12:14 +03:00
orangefs_client_debug_init ( ) ;
2015-07-17 17:38:12 +03:00
help_string_initialized + + ;
return ret ;
2015-11-24 23:12:14 +03:00
case ORANGEFS_DEV_DEBUG :
2015-07-17 17:38:12 +03:00
ret = copy_from_user ( & mask_info ,
( void __user * ) arg ,
sizeof ( mask_info ) ) ;
if ( ret ! = 0 )
return - EIO ;
if ( mask_info . mask_type = = KERNEL_MASK ) {
if ( ( mask_info . mask_value = = 0 )
& & ( kernel_mask_set_mod_init ) ) {
/*
* the kernel debug mask was set when the
* kernel module was loaded ; don ' t override
* it if the client - core was started without
2015-11-24 23:12:14 +03:00
* a value for ORANGEFS_KMODMASK .
2015-07-17 17:38:12 +03:00
*/
return 0 ;
}
debug_mask_to_string ( & mask_info . mask_value ,
mask_info . mask_type ) ;
gossip_debug_mask = mask_info . mask_value ;
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
pr_info ( " %s: kernel debug mask has been modified to "
2015-07-17 17:38:12 +03:00
" :%s: :%llx: \n " ,
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
__func__ ,
2015-07-17 17:38:12 +03:00
kernel_debug_string ,
( unsigned long long ) gossip_debug_mask ) ;
} else if ( mask_info . mask_type = = CLIENT_MASK ) {
debug_mask_to_string ( & mask_info . mask_value ,
mask_info . mask_type ) ;
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
pr_info ( " %s: client debug mask has been modified to "
2015-07-17 17:38:12 +03:00
" :%s: :%llx: \n " ,
Orangefs: de-uglify orangefs_devreq_writev, and devorangefs-req.c in general
AV dislikes many parts of orangefs_devreq_writev. Besides making
orangefs_devreq_writev more easily readable and better commented,
this patch makes an effort to address some of the problems:
> The 5th is quietly ignored unless trailer_size is positive and
> status is zero. If trailer_size > 0 && status == 0, you verify that
> the length of the 5th segment is no more than trailer_size and copy
> it to vmalloc'ed buffer. Without bothering to zero the rest of that
> buffer out.
It was just wrong to allow a 5th segment that is not exactly equal to
trailer_size. Now that that's fixed, there's nothing to zero out in
the vmalloced buffer - it is exactly the right size to hold the
5th segment.
> Another API bogosity: when the 5th segment is present, successful writev()
> returns the sum of sizes of the first 4.
Added size of 5th segment to writev return...
> if concatenation of the first 4 segments is longer than
> 16 + sizeof(struct pvfs2_downcall_s) by no more than sizeof(long) => whine
> and proceed with garbage.
If 4th segment isn't exactly sizeof(struct pvfs2_downcall_s), whine and fail.
> if the 32bit value 4 bytes into op->downcall is zero and 64bit
> value following it is non-zero, the latter is interpreted as the size of
> trailer data.
The latter is what userspace claimed was the length of the trailer data.
The kernel module now compares it to the trailer iovec's iov_len as a
sanity check.
> if there's no trailer, the 5th segment (if present) is completely ignored.
Whine and fail if there should be no trailer, yet a 5th segment is present.
> if vmalloc fails, act as if status (32bit at offset 5 into
> op->downcall) had been -ENOMEM and don't look at the 5th segment at all.
whine and fail with -ENOMEM.
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
2015-12-12 00:45:03 +03:00
__func__ ,
2015-07-17 17:38:12 +03:00
client_debug_string ,
llu ( mask_info . mask_value ) ) ;
} else {
gossip_lerr ( " Invalid mask type.... \n " ) ;
return - EINVAL ;
}
return ret ;
default :
return - ENOIOCTLCMD ;
}
return - ENOIOCTLCMD ;
}
2015-11-24 23:12:14 +03:00
static long orangefs_devreq_ioctl ( struct file * file ,
2015-07-17 17:38:12 +03:00
unsigned int command , unsigned long arg )
{
long ret ;
/* Check for properly constructed commands */
ret = check_ioctl_command ( command ) ;
if ( ret < 0 )
return ( int ) ret ;
return ( int ) dispatch_ioctl_command ( command , arg ) ;
}
# ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
2015-11-24 23:12:14 +03:00
/* Compat structure for the ORANGEFS_DEV_MAP ioctl */
struct ORANGEFS_dev_map_desc32 {
2015-07-17 17:38:12 +03:00
compat_uptr_t ptr ;
__s32 total_size ;
__s32 size ;
__s32 count ;
} ;
static unsigned long translate_dev_map26 ( unsigned long args , long * error )
{
2015-11-24 23:12:14 +03:00
struct ORANGEFS_dev_map_desc32 __user * p32 = ( void __user * ) args ;
2015-07-17 17:38:12 +03:00
/*
* Depending on the architecture , allocate some space on the
* user - call - stack based on our expected layout .
*/
2015-11-24 23:12:14 +03:00
struct ORANGEFS_dev_map_desc __user * p =
2015-07-17 17:38:12 +03:00
compat_alloc_user_space ( sizeof ( * p ) ) ;
2015-07-28 20:27:51 +03:00
compat_uptr_t addr ;
2015-07-17 17:38:12 +03:00
* error = 0 ;
/* get the ptr from the 32 bit user-space */
if ( get_user ( addr , & p32 - > ptr ) )
goto err ;
/* try to put that into a 64-bit layout */
if ( put_user ( compat_ptr ( addr ) , & p - > ptr ) )
goto err ;
/* copy the remaining fields */
if ( copy_in_user ( & p - > total_size , & p32 - > total_size , sizeof ( __s32 ) ) )
goto err ;
if ( copy_in_user ( & p - > size , & p32 - > size , sizeof ( __s32 ) ) )
goto err ;
if ( copy_in_user ( & p - > count , & p32 - > count , sizeof ( __s32 ) ) )
goto err ;
return ( unsigned long ) p ;
err :
* error = - EFAULT ;
return 0 ;
}
/*
* 32 bit user - space apps ' ioctl handlers when kernel modules
* is compiled as a 64 bit one
*/
2015-11-24 23:12:14 +03:00
static long orangefs_devreq_compat_ioctl ( struct file * filp , unsigned int cmd ,
2015-07-17 17:38:12 +03:00
unsigned long args )
{
long ret ;
unsigned long arg = args ;
/* Check for properly constructed commands */
ret = check_ioctl_command ( cmd ) ;
if ( ret < 0 )
return ret ;
2015-11-24 23:12:14 +03:00
if ( cmd = = ORANGEFS_DEV_MAP ) {
2015-07-17 17:38:12 +03:00
/*
* convert the arguments to what we expect internally
* in kernel space
*/
arg = translate_dev_map26 ( args , & ret ) ;
if ( ret < 0 ) {
gossip_err ( " Could not translate dev map \n " ) ;
return ret ;
}
}
/* no other ioctl requires translation */
return dispatch_ioctl_command ( cmd , arg ) ;
}
2015-07-24 17:37:15 +03:00
# endif /* CONFIG_COMPAT is in .config */
2015-07-17 17:38:12 +03:00
/* the assigned character device major number */
2015-11-24 23:12:14 +03:00
static int orangefs_dev_major ;
2015-07-17 17:38:12 +03:00
/*
2015-11-24 23:12:14 +03:00
* Initialize orangefs device specific state :
2015-07-17 17:38:12 +03:00
* Must be called at module load time only
*/
2015-11-24 23:12:14 +03:00
int orangefs_dev_init ( void )
2015-07-17 17:38:12 +03:00
{
2015-11-24 23:12:14 +03:00
/* register orangefs-req device */
orangefs_dev_major = register_chrdev ( 0 ,
ORANGEFS_REQDEVICE_NAME ,
& orangefs_devreq_file_operations ) ;
if ( orangefs_dev_major < 0 ) {
2015-07-17 17:38:12 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG ,
" Failed to register /dev/%s (error %d) \n " ,
2015-11-24 23:12:14 +03:00
ORANGEFS_REQDEVICE_NAME , orangefs_dev_major ) ;
return orangefs_dev_major ;
2015-07-17 17:38:12 +03:00
}
gossip_debug ( GOSSIP_DEV_DEBUG ,
" *** /dev/%s character device registered *** \n " ,
2015-11-24 23:12:14 +03:00
ORANGEFS_REQDEVICE_NAME ) ;
2015-07-17 17:38:12 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG , " 'mknod /dev/%s c %d 0'. \n " ,
2015-11-24 23:12:14 +03:00
ORANGEFS_REQDEVICE_NAME , orangefs_dev_major ) ;
2015-07-17 17:38:12 +03:00
return 0 ;
}
2015-11-24 23:12:14 +03:00
void orangefs_dev_cleanup ( void )
2015-07-17 17:38:12 +03:00
{
2015-11-24 23:12:14 +03:00
unregister_chrdev ( orangefs_dev_major , ORANGEFS_REQDEVICE_NAME ) ;
2015-07-17 17:38:12 +03:00
gossip_debug ( GOSSIP_DEV_DEBUG ,
" *** /dev/%s character device unregistered *** \n " ,
2015-11-24 23:12:14 +03:00
ORANGEFS_REQDEVICE_NAME ) ;
2015-07-17 17:38:12 +03:00
}
2015-11-24 23:12:14 +03:00
static unsigned int orangefs_devreq_poll ( struct file * file ,
2015-07-17 17:38:12 +03:00
struct poll_table_struct * poll_table )
{
int poll_revent_mask = 0 ;
2016-01-19 20:03:05 +03:00
poll_wait ( file , & orangefs_request_list_waitq , poll_table ) ;
2015-07-17 17:38:12 +03:00
2016-01-19 20:03:05 +03:00
if ( ! list_empty ( & orangefs_request_list ) )
poll_revent_mask | = POLL_IN ;
2015-07-17 17:38:12 +03:00
return poll_revent_mask ;
}
2015-11-24 23:12:14 +03:00
const struct file_operations orangefs_devreq_file_operations = {
2015-07-17 17:38:12 +03:00
. owner = THIS_MODULE ,
2015-11-24 23:12:14 +03:00
. read = orangefs_devreq_read ,
. write_iter = orangefs_devreq_write_iter ,
. open = orangefs_devreq_open ,
. release = orangefs_devreq_release ,
. unlocked_ioctl = orangefs_devreq_ioctl ,
2015-07-17 17:38:12 +03:00
# ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
2015-11-24 23:12:14 +03:00
. compat_ioctl = orangefs_devreq_compat_ioctl ,
2015-07-17 17:38:12 +03:00
# endif
2015-11-24 23:12:14 +03:00
. poll = orangefs_devreq_poll
2015-07-17 17:38:12 +03:00
} ;