2006-03-30 15:15:30 +02:00
/*
* " splice " : joining two ropes together by interweaving their strands .
*
* This is the " extended pipe " functionality , where a pipe is used as
* an arbitrary in - memory buffer . Think of a pipe as a small kernel
* buffer that you can use to transfer data from one end to the other .
*
* The traditional unix read / write is extended with a " splice() " operation
* that transfers data buffers to or from a pipe buffer .
*
* Named by Larry McVoy , original implementation from Linus , extended by
2006-04-11 13:56:34 +02:00
* Jens to support splicing to files , network , direct splicing , etc and
* fixing lots of bugs .
2006-03-30 15:15:30 +02:00
*
2006-04-11 13:56:34 +02:00
* Copyright ( C ) 2005 - 2006 Jens Axboe < axboe @ suse . de >
* Copyright ( C ) 2005 - 2006 Linus Torvalds < torvalds @ osdl . org >
* Copyright ( C ) 2006 Ingo Molnar < mingo @ elte . hu >
2006-03-30 15:15:30 +02:00
*
*/
# include <linux/fs.h>
# include <linux/file.h>
# include <linux/pagemap.h>
# include <linux/pipe_fs_i.h>
# include <linux/mm_inline.h>
2006-03-30 15:16:46 +02:00
# include <linux/swap.h>
2006-04-02 23:04:46 +02:00
# include <linux/writeback.h>
# include <linux/buffer_head.h>
2006-03-30 23:06:13 -05:00
# include <linux/module.h>
2006-04-02 23:04:46 +02:00
# include <linux/syscalls.h>
2006-03-30 15:15:30 +02:00
/*
* Passed to the actors
*/
struct splice_desc {
unsigned int len , total_len ; /* current and remaining length */
unsigned int flags ; /* splice flags */
struct file * file ; /* file to read/write */
loff_t pos ; /* file position */
} ;
2006-04-02 23:05:09 +02:00
/*
* Attempt to steal a page from a pipe buffer . This should perhaps go into
* a vm helper function , it ' s already simplified quite a bit by the
* addition of remove_mapping ( ) . If success is returned , the caller may
* attempt to reuse this page for another destination .
*/
2006-03-30 15:16:46 +02:00
static int page_cache_pipe_buf_steal ( struct pipe_inode_info * info ,
struct pipe_buffer * buf )
{
struct page * page = buf - > page ;
2006-04-02 23:04:46 +02:00
struct address_space * mapping = page_mapping ( page ) ;
2006-03-30 15:16:46 +02:00
WARN_ON ( ! PageLocked ( page ) ) ;
WARN_ON ( ! PageUptodate ( page ) ) ;
2006-04-02 23:10:32 +02:00
/*
* At least for ext2 with nobh option , we need to wait on writeback
* completing on this page , since we ' ll remove it from the pagecache .
* Otherwise truncate wont wait on the page , allowing the disk
* blocks to be reused by someone else before we actually wrote our
* data to them . fs corruption ensues .
*/
wait_on_page_writeback ( page ) ;
2006-04-02 23:04:46 +02:00
if ( PagePrivate ( page ) )
try_to_release_page ( page , mapping_gfp_mask ( mapping ) ) ;
if ( ! remove_mapping ( mapping , page ) )
2006-03-30 15:16:46 +02:00
return 1 ;
2006-04-02 23:11:04 +02:00
buf - > flags | = PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU ;
2006-03-30 15:16:46 +02:00
return 0 ;
}
2006-03-30 15:15:30 +02:00
static void page_cache_pipe_buf_release ( struct pipe_inode_info * info ,
struct pipe_buffer * buf )
{
page_cache_release ( buf - > page ) ;
buf - > page = NULL ;
2006-04-02 23:11:04 +02:00
buf - > flags & = ~ ( PIPE_BUF_FLAG_STOLEN | PIPE_BUF_FLAG_LRU ) ;
2006-03-30 15:15:30 +02:00
}
static void * page_cache_pipe_buf_map ( struct file * file ,
struct pipe_inode_info * info ,
struct pipe_buffer * buf )
{
struct page * page = buf - > page ;
2006-04-10 09:04:41 +02:00
int err ;
2006-03-30 15:15:30 +02:00
if ( ! PageUptodate ( page ) ) {
2006-04-10 09:04:41 +02:00
lock_page ( page ) ;
/*
* Page got truncated / unhashed . This will cause a 0 - byte
2006-04-11 13:57:21 +02:00
* splice , if this is the first page .
2006-04-10 09:04:41 +02:00
*/
if ( ! page - > mapping ) {
err = - ENODATA ;
goto error ;
}
2006-03-30 15:15:30 +02:00
2006-04-10 09:04:41 +02:00
/*
2006-04-11 13:57:21 +02:00
* Uh oh , read - error from disk .
2006-04-10 09:04:41 +02:00
*/
if ( ! PageUptodate ( page ) ) {
err = - EIO ;
goto error ;
}
/*
2006-04-11 13:57:21 +02:00
* Page is ok afterall , fall through to mapping .
2006-04-10 09:04:41 +02:00
*/
2006-03-30 15:15:30 +02:00
unlock_page ( page ) ;
}
2006-04-10 09:04:41 +02:00
return kmap ( page ) ;
error :
unlock_page ( page ) ;
return ERR_PTR ( err ) ;
2006-03-30 15:15:30 +02:00
}
static void page_cache_pipe_buf_unmap ( struct pipe_inode_info * info ,
struct pipe_buffer * buf )
{
kunmap ( buf - > page ) ;
}
static struct pipe_buf_operations page_cache_pipe_buf_ops = {
. can_merge = 0 ,
. map = page_cache_pipe_buf_map ,
. unmap = page_cache_pipe_buf_unmap ,
. release = page_cache_pipe_buf_release ,
2006-03-30 15:16:46 +02:00
. steal = page_cache_pipe_buf_steal ,
2006-03-30 15:15:30 +02:00
} ;
2006-04-02 23:05:09 +02:00
/*
* Pipe output worker . This sets up our pipe format with the page cache
* pipe buffer operations . Otherwise very similar to the regular pipe_writev ( ) .
*/
2006-04-10 15:18:35 +02:00
static ssize_t move_to_pipe ( struct pipe_inode_info * pipe , struct page * * pages ,
2006-03-30 15:15:30 +02:00
int nr_pages , unsigned long offset ,
2006-04-02 12:46:35 -07:00
unsigned long len , unsigned int flags )
2006-03-30 15:15:30 +02:00
{
int ret , do_wakeup , i ;
ret = 0 ;
do_wakeup = 0 ;
i = 0 ;
2006-04-10 15:18:35 +02:00
if ( pipe - > inode )
mutex_lock ( & pipe - > inode - > i_mutex ) ;
2006-03-30 15:15:30 +02:00
for ( ; ; ) {
2006-04-10 15:18:35 +02:00
if ( ! pipe - > readers ) {
2006-03-30 15:15:30 +02:00
send_sig ( SIGPIPE , current , 0 ) ;
if ( ! ret )
ret = - EPIPE ;
break ;
}
2006-04-11 13:53:56 +02:00
if ( pipe - > nrbufs < PIPE_BUFFERS ) {
int newbuf = ( pipe - > curbuf + pipe - > nrbufs ) & ( PIPE_BUFFERS - 1 ) ;
2006-04-10 15:18:35 +02:00
struct pipe_buffer * buf = pipe - > bufs + newbuf ;
2006-03-30 15:15:30 +02:00
struct page * page = pages [ i + + ] ;
unsigned long this_len ;
this_len = PAGE_CACHE_SIZE - offset ;
if ( this_len > len )
this_len = len ;
buf - > page = page ;
buf - > offset = offset ;
buf - > len = this_len ;
buf - > ops = & page_cache_pipe_buf_ops ;
2006-04-11 13:53:56 +02:00
pipe - > nrbufs + + ;
if ( pipe - > inode )
do_wakeup = 1 ;
2006-03-30 15:15:30 +02:00
ret + = this_len ;
len - = this_len ;
offset = 0 ;
if ( ! - - nr_pages )
break ;
if ( ! len )
break ;
2006-04-11 13:53:56 +02:00
if ( pipe - > nrbufs < PIPE_BUFFERS )
2006-03-30 15:15:30 +02:00
continue ;
break ;
}
2006-04-02 12:46:35 -07:00
if ( flags & SPLICE_F_NONBLOCK ) {
if ( ! ret )
ret = - EAGAIN ;
break ;
}
2006-03-30 15:15:30 +02:00
if ( signal_pending ( current ) ) {
if ( ! ret )
ret = - ERESTARTSYS ;
break ;
}
if ( do_wakeup ) {
2006-04-10 09:03:32 +02:00
smp_mb ( ) ;
2006-04-10 15:18:35 +02:00
if ( waitqueue_active ( & pipe - > wait ) )
wake_up_interruptible_sync ( & pipe - > wait ) ;
kill_fasync ( & pipe - > fasync_readers , SIGIO , POLL_IN ) ;
2006-03-30 15:15:30 +02:00
do_wakeup = 0 ;
}
2006-04-10 15:18:35 +02:00
pipe - > waiting_writers + + ;
pipe_wait ( pipe ) ;
pipe - > waiting_writers - - ;
2006-03-30 15:15:30 +02:00
}
2006-04-10 15:18:35 +02:00
if ( pipe - > inode )
mutex_unlock ( & pipe - > inode - > i_mutex ) ;
2006-03-30 15:15:30 +02:00
if ( do_wakeup ) {
2006-04-10 09:03:32 +02:00
smp_mb ( ) ;
2006-04-10 15:18:35 +02:00
if ( waitqueue_active ( & pipe - > wait ) )
wake_up_interruptible ( & pipe - > wait ) ;
kill_fasync ( & pipe - > fasync_readers , SIGIO , POLL_IN ) ;
2006-03-30 15:15:30 +02:00
}
while ( i < nr_pages )
page_cache_release ( pages [ i + + ] ) ;
return ret ;
}
2006-04-10 15:18:35 +02:00
static int
__generic_file_splice_read ( struct file * in , struct pipe_inode_info * pipe ,
size_t len , unsigned int flags )
2006-03-30 15:15:30 +02:00
{
struct address_space * mapping = in - > f_mapping ;
unsigned int offset , nr_pages ;
2006-04-10 09:03:58 +02:00
struct page * pages [ PIPE_BUFFERS ] ;
2006-03-30 15:15:30 +02:00
struct page * page ;
2006-04-10 09:03:58 +02:00
pgoff_t index ;
2006-04-11 13:52:47 +02:00
int i , error ;
2006-03-30 15:15:30 +02:00
index = in - > f_pos > > PAGE_CACHE_SHIFT ;
offset = in - > f_pos & ~ PAGE_CACHE_MASK ;
nr_pages = ( len + offset + PAGE_CACHE_SIZE - 1 ) > > PAGE_CACHE_SHIFT ;
if ( nr_pages > PIPE_BUFFERS )
nr_pages = PIPE_BUFFERS ;
/*
2006-04-11 13:57:21 +02:00
* Initiate read - ahead on this page range . however , don ' t call into
2006-04-10 09:05:04 +02:00
* read - ahead if this is a non - zero offset ( we are likely doing small
* chunk splice and the page is already there ) for a single page .
2006-03-30 15:15:30 +02:00
*/
2006-04-10 09:05:04 +02:00
if ( ! offset | | nr_pages > 1 )
do_page_cache_readahead ( mapping , in , index , nr_pages ) ;
2006-03-30 15:15:30 +02:00
/*
2006-04-11 13:57:21 +02:00
* Now fill in the holes :
2006-03-30 15:15:30 +02:00
*/
2006-04-11 13:52:47 +02:00
error = 0 ;
2006-04-10 09:03:58 +02:00
for ( i = 0 ; i < nr_pages ; i + + , index + + ) {
2006-04-11 13:52:47 +02:00
find_page :
2006-03-30 15:15:30 +02:00
/*
2006-04-11 13:52:47 +02:00
* lookup the page for this index
2006-03-30 15:15:30 +02:00
*/
2006-04-11 13:52:47 +02:00
page = find_get_page ( mapping , index ) ;
if ( ! page ) {
/*
* If in nonblock mode then dont block on
* readpage ( we ' ve kicked readahead so there
* will be asynchronous progress ) :
*/
if ( flags & SPLICE_F_NONBLOCK )
break ;
/*
* page didn ' t exist , allocate one
*/
page = page_cache_alloc_cold ( mapping ) ;
if ( ! page )
break ;
error = add_to_page_cache_lru ( page , mapping , index ,
mapping_gfp_mask ( mapping ) ) ;
if ( unlikely ( error ) ) {
page_cache_release ( page ) ;
break ;
}
goto readpage ;
}
/*
* If the page isn ' t uptodate , we may need to start io on it
*/
if ( ! PageUptodate ( page ) ) {
lock_page ( page ) ;
/*
* page was truncated , stop here . if this isn ' t the
* first page , we ' ll just complete what we already
* added
*/
if ( ! page - > mapping ) {
unlock_page ( page ) ;
page_cache_release ( page ) ;
break ;
}
/*
* page was already under io and is now done , great
*/
if ( PageUptodate ( page ) ) {
unlock_page ( page ) ;
goto fill_it ;
}
2006-03-30 15:15:30 +02:00
2006-04-11 13:52:47 +02:00
readpage :
/*
* need to read in the page
*/
error = mapping - > a_ops - > readpage ( in , page ) ;
2006-03-30 15:15:30 +02:00
if ( unlikely ( error ) ) {
page_cache_release ( page ) ;
2006-04-11 13:52:47 +02:00
if ( error = = AOP_TRUNCATED_PAGE )
goto find_page ;
2006-03-30 15:15:30 +02:00
break ;
}
}
2006-04-11 13:52:47 +02:00
fill_it :
2006-04-10 09:03:58 +02:00
pages [ i ] = page ;
2006-03-30 15:15:30 +02:00
}
2006-04-10 09:03:58 +02:00
if ( i )
return move_to_pipe ( pipe , pages , i , offset , len , flags ) ;
2006-03-30 15:15:30 +02:00
2006-04-11 13:52:47 +02:00
return error ;
2006-03-30 15:15:30 +02:00
}
2006-04-02 23:05:09 +02:00
/**
* generic_file_splice_read - splice data from file to a pipe
* @ in : file to splice from
* @ pipe : pipe to splice to
* @ len : number of bytes to splice
* @ flags : splice modifier flags
*
* Will read pages from given file and fill them into a pipe .
*/
2006-04-10 15:18:35 +02:00
ssize_t generic_file_splice_read ( struct file * in , struct pipe_inode_info * pipe ,
2006-03-30 15:15:30 +02:00
size_t len , unsigned int flags )
{
ssize_t spliced ;
int ret ;
ret = 0 ;
spliced = 0 ;
2006-04-10 15:18:35 +02:00
2006-03-30 15:15:30 +02:00
while ( len ) {
2006-04-02 12:46:35 -07:00
ret = __generic_file_splice_read ( in , pipe , len , flags ) ;
2006-03-30 15:15:30 +02:00
if ( ret < = 0 )
break ;
in - > f_pos + = ret ;
len - = ret ;
spliced + = ret ;
2006-04-02 12:46:35 -07:00
if ( ! ( flags & SPLICE_F_NONBLOCK ) )
continue ;
ret = - EAGAIN ;
break ;
2006-03-30 15:15:30 +02:00
}
if ( spliced )
return spliced ;
return ret ;
}
2006-04-02 23:06:05 +02:00
EXPORT_SYMBOL ( generic_file_splice_read ) ;
2006-03-30 15:15:30 +02:00
/*
2006-04-02 23:04:46 +02:00
* Send ' sd - > len ' bytes to socket from ' sd - > file ' at position ' sd - > pos '
* using sendpage ( ) .
2006-03-30 15:15:30 +02:00
*/
static int pipe_to_sendpage ( struct pipe_inode_info * info ,
struct pipe_buffer * buf , struct splice_desc * sd )
{
struct file * file = sd - > file ;
loff_t pos = sd - > pos ;
unsigned int offset ;
ssize_t ret ;
void * ptr ;
2006-04-02 23:05:41 +02:00
int more ;
2006-03-30 15:15:30 +02:00
/*
2006-04-11 13:57:21 +02:00
* Sub - optimal , but we are limited by the pipe - > map . We don ' t
2006-03-30 15:15:30 +02:00
* need a kmap ' ed buffer here , we just want to make sure we
* have the page pinned if the pipe page originates from the
2006-04-11 13:57:21 +02:00
* page cache .
2006-03-30 15:15:30 +02:00
*/
ptr = buf - > ops - > map ( file , info , buf ) ;
if ( IS_ERR ( ptr ) )
return PTR_ERR ( ptr ) ;
offset = pos & ~ PAGE_CACHE_MASK ;
2006-04-02 23:05:41 +02:00
more = ( sd - > flags & SPLICE_F_MORE ) | | sd - > len < sd - > total_len ;
2006-03-30 15:15:30 +02:00
2006-04-02 23:05:41 +02:00
ret = file - > f_op - > sendpage ( file , buf - > page , offset , sd - > len , & pos , more ) ;
2006-03-30 15:15:30 +02:00
buf - > ops - > unmap ( info , buf ) ;
if ( ret = = sd - > len )
return 0 ;
return - EIO ;
}
/*
* This is a little more tricky than the file - > pipe splicing . There are
* basically three cases :
*
* - Destination page already exists in the address space and there
* are users of it . For that case we have no other option that
* copying the data . Tough luck .
* - Destination page already exists in the address space , but there
* are no users of it . Make sure it ' s uptodate , then drop it . Fall
* through to last case .
* - Destination page does not exist , we can add the pipe page to
* the page cache and avoid the copy .
*
2006-04-02 23:05:09 +02:00
* If asked to move pages to the output file ( SPLICE_F_MOVE is set in
* sd - > flags ) , we attempt to migrate pages from the pipe to the output
* file address space page cache . This is possible if no one else has
* the pipe page referenced outside of the pipe and page cache . If
* SPLICE_F_MOVE isn ' t set , or we cannot move the page , we simply create
* a new page in the output file page cache and fill / dirty that .
2006-03-30 15:15:30 +02:00
*/
static int pipe_to_file ( struct pipe_inode_info * info , struct pipe_buffer * buf ,
struct splice_desc * sd )
{
struct file * file = sd - > file ;
struct address_space * mapping = file - > f_mapping ;
2006-04-02 23:11:04 +02:00
gfp_t gfp_mask = mapping_gfp_mask ( mapping ) ;
2006-03-30 15:15:30 +02:00
unsigned int offset ;
struct page * page ;
pgoff_t index ;
2006-03-30 15:16:46 +02:00
char * src ;
2006-04-02 23:11:04 +02:00
int ret ;
2006-03-30 15:15:30 +02:00
/*
2006-04-10 09:04:41 +02:00
* make sure the data in this buffer is uptodate
2006-03-30 15:15:30 +02:00
*/
src = buf - > ops - > map ( file , info , buf ) ;
if ( IS_ERR ( src ) )
return PTR_ERR ( src ) ;
index = sd - > pos > > PAGE_CACHE_SHIFT ;
offset = sd - > pos & ~ PAGE_CACHE_MASK ;
/*
2006-04-11 13:57:21 +02:00
* Reuse buf page , if SPLICE_F_MOVE is set .
2006-03-30 15:15:30 +02:00
*/
2006-03-30 15:16:46 +02:00
if ( sd - > flags & SPLICE_F_MOVE ) {
2006-04-02 23:05:09 +02:00
/*
* If steal succeeds , buf - > page is now pruned from the vm
* side ( LRU and page cache ) and we can reuse it .
*/
2006-03-30 15:16:46 +02:00
if ( buf - > ops - > steal ( info , buf ) )
goto find_page ;
2006-04-10 09:04:41 +02:00
/*
* this will also set the page locked
*/
2006-03-30 15:16:46 +02:00
page = buf - > page ;
2006-04-02 23:11:04 +02:00
if ( add_to_page_cache ( page , mapping , index , gfp_mask ) )
2006-03-30 15:16:46 +02:00
goto find_page ;
2006-04-02 23:11:04 +02:00
if ( ! ( buf - > flags & PIPE_BUF_FLAG_LRU ) )
lru_cache_add ( page ) ;
2006-03-30 15:16:46 +02:00
} else {
find_page :
ret = - ENOMEM ;
2006-04-02 23:11:04 +02:00
page = find_or_create_page ( mapping , index , gfp_mask ) ;
2006-03-30 15:16:46 +02:00
if ( ! page )
2006-04-10 09:02:40 +02:00
goto out_nomem ;
2006-03-30 15:16:46 +02:00
/*
* If the page is uptodate , it is also locked . If it isn ' t
* uptodate , we can mark it uptodate if we are filling the
* full page . Otherwise we need to read it in first . . .
*/
if ( ! PageUptodate ( page ) ) {
if ( sd - > len < PAGE_CACHE_SIZE ) {
ret = mapping - > a_ops - > readpage ( file , page ) ;
if ( unlikely ( ret ) )
goto out ;
lock_page ( page ) ;
if ( ! PageUptodate ( page ) ) {
/*
2006-04-11 13:57:21 +02:00
* Page got invalidated , repeat .
2006-03-30 15:16:46 +02:00
*/
if ( ! page - > mapping ) {
unlock_page ( page ) ;
page_cache_release ( page ) ;
goto find_page ;
}
ret = - EIO ;
goto out ;
2006-03-30 15:15:30 +02:00
}
2006-03-30 15:16:46 +02:00
} else {
WARN_ON ( ! PageLocked ( page ) ) ;
SetPageUptodate ( page ) ;
2006-03-30 15:15:30 +02:00
}
}
}
ret = mapping - > a_ops - > prepare_write ( file , page , 0 , sd - > len ) ;
2006-04-02 23:04:46 +02:00
if ( ret = = AOP_TRUNCATED_PAGE ) {
page_cache_release ( page ) ;
goto find_page ;
} else if ( ret )
2006-03-30 15:15:30 +02:00
goto out ;
2006-04-02 23:11:04 +02:00
if ( ! ( buf - > flags & PIPE_BUF_FLAG_STOLEN ) ) {
2006-03-30 15:16:46 +02:00
char * dst = kmap_atomic ( page , KM_USER0 ) ;
memcpy ( dst + offset , src + buf - > offset , sd - > len ) ;
flush_dcache_page ( page ) ;
kunmap_atomic ( dst , KM_USER0 ) ;
}
2006-03-30 15:15:30 +02:00
ret = mapping - > a_ops - > commit_write ( file , page , 0 , sd - > len ) ;
2006-04-02 23:04:46 +02:00
if ( ret = = AOP_TRUNCATED_PAGE ) {
page_cache_release ( page ) ;
goto find_page ;
} else if ( ret )
2006-03-30 15:15:30 +02:00
goto out ;
2006-04-10 09:01:01 +02:00
mark_page_accessed ( page ) ;
2006-04-02 23:04:46 +02:00
balance_dirty_pages_ratelimited ( mapping ) ;
2006-03-30 15:15:30 +02:00
out :
2006-04-02 23:11:04 +02:00
if ( ! ( buf - > flags & PIPE_BUF_FLAG_STOLEN ) ) {
2006-03-30 15:16:46 +02:00
page_cache_release ( page ) ;
2006-04-02 23:04:46 +02:00
unlock_page ( page ) ;
}
2006-04-10 09:02:40 +02:00
out_nomem :
2006-03-30 15:15:30 +02:00
buf - > ops - > unmap ( info , buf ) ;
return ret ;
}
typedef int ( splice_actor ) ( struct pipe_inode_info * , struct pipe_buffer * ,
struct splice_desc * ) ;
2006-04-02 23:05:09 +02:00
/*
* Pipe input worker . Most of this logic works like a regular pipe , the
* key here is the ' actor ' worker passed in that actually moves the data
* to the wanted destination . See pipe_to_file / pipe_to_sendpage above .
*/
2006-04-10 15:18:35 +02:00
static ssize_t move_from_pipe ( struct pipe_inode_info * pipe , struct file * out ,
2006-03-30 15:15:30 +02:00
size_t len , unsigned int flags ,
splice_actor * actor )
{
int ret , do_wakeup , err ;
struct splice_desc sd ;
ret = 0 ;
do_wakeup = 0 ;
sd . total_len = len ;
sd . flags = flags ;
sd . file = out ;
sd . pos = out - > f_pos ;
2006-04-10 15:18:35 +02:00
if ( pipe - > inode )
mutex_lock ( & pipe - > inode - > i_mutex ) ;
2006-03-30 15:15:30 +02:00
for ( ; ; ) {
2006-04-11 13:53:56 +02:00
if ( pipe - > nrbufs ) {
struct pipe_buffer * buf = pipe - > bufs + pipe - > curbuf ;
2006-03-30 15:15:30 +02:00
struct pipe_buf_operations * ops = buf - > ops ;
sd . len = buf - > len ;
if ( sd . len > sd . total_len )
sd . len = sd . total_len ;
2006-04-10 15:18:35 +02:00
err = actor ( pipe , buf , & sd ) ;
2006-03-30 15:15:30 +02:00
if ( err ) {
if ( ! ret & & err ! = - ENODATA )
ret = err ;
break ;
}
ret + = sd . len ;
buf - > offset + = sd . len ;
buf - > len - = sd . len ;
2006-04-11 13:57:21 +02:00
2006-03-30 15:15:30 +02:00
if ( ! buf - > len ) {
buf - > ops = NULL ;
2006-04-10 15:18:35 +02:00
ops - > release ( pipe , buf ) ;
2006-04-11 13:53:56 +02:00
pipe - > curbuf = ( pipe - > curbuf + 1 ) & ( PIPE_BUFFERS - 1 ) ;
pipe - > nrbufs - - ;
if ( pipe - > inode )
do_wakeup = 1 ;
2006-03-30 15:15:30 +02:00
}
sd . pos + = sd . len ;
sd . total_len - = sd . len ;
if ( ! sd . total_len )
break ;
}
2006-04-11 13:53:56 +02:00
if ( pipe - > nrbufs )
2006-03-30 15:15:30 +02:00
continue ;
2006-04-10 15:18:35 +02:00
if ( ! pipe - > writers )
2006-03-30 15:15:30 +02:00
break ;
2006-04-10 15:18:35 +02:00
if ( ! pipe - > waiting_writers ) {
2006-03-30 15:15:30 +02:00
if ( ret )
break ;
}
2006-04-02 12:46:35 -07:00
if ( flags & SPLICE_F_NONBLOCK ) {
if ( ! ret )
ret = - EAGAIN ;
break ;
}
2006-03-30 15:15:30 +02:00
if ( signal_pending ( current ) ) {
if ( ! ret )
ret = - ERESTARTSYS ;
break ;
}
if ( do_wakeup ) {
2006-04-10 09:03:32 +02:00
smp_mb ( ) ;
2006-04-10 15:18:35 +02:00
if ( waitqueue_active ( & pipe - > wait ) )
wake_up_interruptible_sync ( & pipe - > wait ) ;
kill_fasync ( & pipe - > fasync_writers , SIGIO , POLL_OUT ) ;
2006-03-30 15:15:30 +02:00
do_wakeup = 0 ;
}
2006-04-10 15:18:35 +02:00
pipe_wait ( pipe ) ;
2006-03-30 15:15:30 +02:00
}
2006-04-10 15:18:35 +02:00
if ( pipe - > inode )
mutex_unlock ( & pipe - > inode - > i_mutex ) ;
2006-03-30 15:15:30 +02:00
if ( do_wakeup ) {
2006-04-10 09:03:32 +02:00
smp_mb ( ) ;
2006-04-10 15:18:35 +02:00
if ( waitqueue_active ( & pipe - > wait ) )
wake_up_interruptible ( & pipe - > wait ) ;
kill_fasync ( & pipe - > fasync_writers , SIGIO , POLL_OUT ) ;
2006-03-30 15:15:30 +02:00
}
out - > f_pos = sd . pos ;
return ret ;
}
2006-04-02 23:05:09 +02:00
/**
* generic_file_splice_write - splice data from a pipe to a file
2006-04-10 15:18:35 +02:00
* @ pipe : pipe info
2006-04-02 23:05:09 +02:00
* @ out : file to write to
* @ len : number of bytes to splice
* @ flags : splice modifier flags
*
* Will either move or copy pages ( determined by @ flags options ) from
* the given pipe inode to the given file .
*
*/
2006-04-10 15:18:35 +02:00
ssize_t
generic_file_splice_write ( struct pipe_inode_info * pipe , struct file * out ,
size_t len , unsigned int flags )
2006-03-30 15:15:30 +02:00
{
2006-04-02 23:04:46 +02:00
struct address_space * mapping = out - > f_mapping ;
2006-04-10 15:18:35 +02:00
ssize_t ret ;
ret = move_from_pipe ( pipe , out , len , flags , pipe_to_file ) ;
2006-04-02 23:04:46 +02:00
/*
2006-04-11 13:57:21 +02:00
* If file or inode is SYNC and we actually wrote some data , sync it .
2006-04-02 23:04:46 +02:00
*/
if ( unlikely ( ( out - > f_flags & O_SYNC ) | | IS_SYNC ( mapping - > host ) )
& & ret > 0 ) {
struct inode * inode = mapping - > host ;
int err ;
mutex_lock ( & inode - > i_mutex ) ;
err = generic_osync_inode ( mapping - > host , mapping ,
2006-04-11 13:56:09 +02:00
OSYNC_METADATA | OSYNC_DATA ) ;
2006-04-02 23:04:46 +02:00
mutex_unlock ( & inode - > i_mutex ) ;
if ( err )
ret = err ;
}
return ret ;
2006-03-30 15:15:30 +02:00
}
2006-04-02 23:06:05 +02:00
EXPORT_SYMBOL ( generic_file_splice_write ) ;
2006-04-02 23:05:09 +02:00
/**
* generic_splice_sendpage - splice data from a pipe to a socket
* @ inode : pipe inode
* @ out : socket to write to
* @ len : number of bytes to splice
* @ flags : splice modifier flags
*
* Will send @ len bytes from the pipe to a network socket . No data copying
* is involved .
*
*/
2006-04-10 15:18:35 +02:00
ssize_t generic_splice_sendpage ( struct pipe_inode_info * pipe , struct file * out ,
2006-03-30 15:15:30 +02:00
size_t len , unsigned int flags )
{
2006-04-10 15:18:35 +02:00
return move_from_pipe ( pipe , out , len , flags , pipe_to_sendpage ) ;
2006-03-30 15:15:30 +02:00
}
2006-04-02 23:06:05 +02:00
EXPORT_SYMBOL ( generic_splice_sendpage ) ;
2006-03-30 23:06:13 -05:00
2006-04-02 23:05:09 +02:00
/*
* Attempt to initiate a splice from pipe to file .
*/
2006-04-10 15:18:35 +02:00
static long do_splice_from ( struct pipe_inode_info * pipe , struct file * out ,
2006-04-11 13:52:07 +02:00
size_t len , unsigned int flags )
2006-03-30 15:15:30 +02:00
{
loff_t pos ;
int ret ;
2006-04-11 13:56:09 +02:00
if ( unlikely ( ! out - > f_op | | ! out - > f_op - > splice_write ) )
2006-03-30 15:15:30 +02:00
return - EINVAL ;
2006-04-11 13:56:09 +02:00
if ( unlikely ( ! ( out - > f_mode & FMODE_WRITE ) ) )
2006-03-30 15:15:30 +02:00
return - EBADF ;
pos = out - > f_pos ;
2006-04-10 15:18:58 +02:00
2006-03-30 15:15:30 +02:00
ret = rw_verify_area ( WRITE , out , & pos , len ) ;
if ( unlikely ( ret < 0 ) )
return ret ;
return out - > f_op - > splice_write ( pipe , out , len , flags ) ;
}
2006-04-02 23:05:09 +02:00
/*
* Attempt to initiate a splice from a file to a pipe .
*/
2006-04-11 13:52:07 +02:00
static long do_splice_to ( struct file * in , struct pipe_inode_info * pipe ,
size_t len , unsigned int flags )
2006-03-30 15:15:30 +02:00
{
loff_t pos , isize , left ;
int ret ;
2006-04-11 13:56:09 +02:00
if ( unlikely ( ! in - > f_op | | ! in - > f_op - > splice_read ) )
2006-03-30 15:15:30 +02:00
return - EINVAL ;
2006-04-11 13:56:09 +02:00
if ( unlikely ( ! ( in - > f_mode & FMODE_READ ) ) )
2006-03-30 15:15:30 +02:00
return - EBADF ;
pos = in - > f_pos ;
2006-04-10 15:18:58 +02:00
2006-03-30 15:15:30 +02:00
ret = rw_verify_area ( READ , in , & pos , len ) ;
if ( unlikely ( ret < 0 ) )
return ret ;
isize = i_size_read ( in - > f_mapping - > host ) ;
if ( unlikely ( in - > f_pos > = isize ) )
return 0 ;
left = isize - in - > f_pos ;
2006-04-11 13:56:09 +02:00
if ( unlikely ( left < len ) )
2006-03-30 15:15:30 +02:00
len = left ;
return in - > f_op - > splice_read ( in , pipe , len , flags ) ;
}
2006-04-11 13:52:07 +02:00
long do_splice_direct ( struct file * in , struct file * out , size_t len ,
unsigned int flags )
{
struct pipe_inode_info * pipe ;
long ret , bytes ;
umode_t i_mode ;
int i ;
/*
* We require the input being a regular file , as we don ' t want to
* randomly drop data for eg socket - > socket splicing . Use the
* piped splicing for that !
*/
i_mode = in - > f_dentry - > d_inode - > i_mode ;
if ( unlikely ( ! S_ISREG ( i_mode ) & & ! S_ISBLK ( i_mode ) ) )
return - EINVAL ;
/*
* neither in nor out is a pipe , setup an internal pipe attached to
* ' out ' and transfer the wanted data from ' in ' to ' out ' through that
*/
pipe = current - > splice_pipe ;
2006-04-11 13:56:09 +02:00
if ( unlikely ( ! pipe ) ) {
2006-04-11 13:52:07 +02:00
pipe = alloc_pipe_info ( NULL ) ;
if ( ! pipe )
return - ENOMEM ;
/*
* We don ' t have an immediate reader , but we ' ll read the stuff
* out of the pipe right after the move_to_pipe ( ) . So set
* PIPE_READERS appropriately .
*/
pipe - > readers = 1 ;
current - > splice_pipe = pipe ;
}
/*
2006-04-11 13:57:21 +02:00
* Do the splice .
2006-04-11 13:52:07 +02:00
*/
ret = 0 ;
bytes = 0 ;
while ( len ) {
size_t read_len , max_read_len ;
/*
* Do at most PIPE_BUFFERS pages worth of transfer :
*/
max_read_len = min ( len , ( size_t ) ( PIPE_BUFFERS * PAGE_SIZE ) ) ;
ret = do_splice_to ( in , pipe , max_read_len , flags ) ;
if ( unlikely ( ret < 0 ) )
goto out_release ;
read_len = ret ;
/*
* NOTE : nonblocking mode only applies to the input . We
* must not do the output in nonblocking mode as then we
* could get stuck data in the internal pipe :
*/
ret = do_splice_from ( pipe , out , read_len ,
flags & ~ SPLICE_F_NONBLOCK ) ;
if ( unlikely ( ret < 0 ) )
goto out_release ;
bytes + = ret ;
len - = ret ;
/*
* In nonblocking mode , if we got back a short read then
* that was due to either an IO error or due to the
* pagecache entry not being there . In the IO error case
* the _next_ splice attempt will produce a clean IO error
* return value ( not a short read ) , so in both cases it ' s
* correct to break out of the loop here :
*/
if ( ( flags & SPLICE_F_NONBLOCK ) & & ( read_len < max_read_len ) )
break ;
}
pipe - > nrbufs = pipe - > curbuf = 0 ;
return bytes ;
out_release :
/*
* If we did an incomplete transfer we must release
* the pipe buffers in question :
*/
for ( i = 0 ; i < PIPE_BUFFERS ; i + + ) {
struct pipe_buffer * buf = pipe - > bufs + i ;
if ( buf - > ops ) {
buf - > ops - > release ( pipe , buf ) ;
buf - > ops = NULL ;
}
}
pipe - > nrbufs = pipe - > curbuf = 0 ;
/*
* If we transferred some data , return the number of bytes :
*/
if ( bytes > 0 )
return bytes ;
return ret ;
}
EXPORT_SYMBOL ( do_splice_direct ) ;
2006-04-02 23:05:09 +02:00
/*
* Determine where to splice to / from .
*/
2006-04-10 15:18:58 +02:00
static long do_splice ( struct file * in , loff_t __user * off_in ,
struct file * out , loff_t __user * off_out ,
size_t len , unsigned int flags )
2006-03-30 15:15:30 +02:00
{
2006-04-10 15:18:35 +02:00
struct pipe_inode_info * pipe ;
2006-03-30 15:15:30 +02:00
2006-04-10 15:18:35 +02:00
pipe = in - > f_dentry - > d_inode - > i_pipe ;
2006-04-10 15:18:58 +02:00
if ( pipe ) {
if ( off_in )
return - ESPIPE ;
2006-04-11 13:52:07 +02:00
if ( off_out ) {
if ( out - > f_op - > llseek = = no_llseek )
return - EINVAL ;
if ( copy_from_user ( & out - > f_pos , off_out ,
sizeof ( loff_t ) ) )
return - EFAULT ;
}
2006-04-10 15:18:58 +02:00
2006-04-11 13:52:07 +02:00
return do_splice_from ( pipe , out , len , flags ) ;
2006-04-10 15:18:58 +02:00
}
2006-03-30 15:15:30 +02:00
2006-04-10 15:18:35 +02:00
pipe = out - > f_dentry - > d_inode - > i_pipe ;
2006-04-10 15:18:58 +02:00
if ( pipe ) {
if ( off_out )
return - ESPIPE ;
2006-04-11 13:52:07 +02:00
if ( off_in ) {
if ( in - > f_op - > llseek = = no_llseek )
return - EINVAL ;
if ( copy_from_user ( & in - > f_pos , off_in , sizeof ( loff_t ) ) )
return - EFAULT ;
}
2006-04-10 15:18:58 +02:00
2006-04-11 13:52:07 +02:00
return do_splice_to ( in , pipe , len , flags ) ;
2006-04-10 15:18:58 +02:00
}
2006-03-30 15:15:30 +02:00
return - EINVAL ;
}
2006-04-10 15:18:58 +02:00
asmlinkage long sys_splice ( int fd_in , loff_t __user * off_in ,
int fd_out , loff_t __user * off_out ,
size_t len , unsigned int flags )
2006-03-30 15:15:30 +02:00
{
long error ;
struct file * in , * out ;
int fput_in , fput_out ;
if ( unlikely ( ! len ) )
return 0 ;
error = - EBADF ;
2006-04-10 15:18:58 +02:00
in = fget_light ( fd_in , & fput_in ) ;
2006-03-30 15:15:30 +02:00
if ( in ) {
if ( in - > f_mode & FMODE_READ ) {
2006-04-10 15:18:58 +02:00
out = fget_light ( fd_out , & fput_out ) ;
2006-03-30 15:15:30 +02:00
if ( out ) {
if ( out - > f_mode & FMODE_WRITE )
2006-04-10 15:18:58 +02:00
error = do_splice ( in , off_in ,
out , off_out ,
len , flags ) ;
2006-03-30 15:15:30 +02:00
fput_light ( out , fput_out ) ;
}
}
fput_light ( in , fput_in ) ;
}
return error ;
}