2008-01-08 23:46:30 +03:00
/*
* Copyright ( C ) 2007 Oracle . All rights reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public
* License along with this program ; if not , write to the
* Free Software Foundation , Inc . , 59 Temple Place - Suite 330 ,
* Boston , MA 021110 - 1307 , USA .
*/
# include <linux/gfp.h>
# include <linux/slab.h>
2008-04-30 21:59:35 +04:00
# include <linux/blkdev.h>
2008-01-08 23:46:30 +03:00
# include "ctree.h"
# include "transaction.h"
# include "btrfs_inode.h"
2008-07-17 20:53:50 +04:00
# include "extent_io.h"
2008-01-08 23:46:30 +03:00
2008-07-17 20:53:50 +04:00
static u64 entry_end ( struct btrfs_ordered_extent * entry )
2008-01-08 23:46:30 +03:00
{
2008-07-17 20:53:50 +04:00
if ( entry - > file_offset + entry - > len < entry - > file_offset )
return ( u64 ) - 1 ;
return entry - > file_offset + entry - > len ;
2008-01-08 23:46:30 +03:00
}
2008-07-17 20:53:50 +04:00
static struct rb_node * tree_insert ( struct rb_root * root , u64 file_offset ,
struct rb_node * node )
2008-01-08 23:46:30 +03:00
{
struct rb_node * * p = & root - > rb_node ;
struct rb_node * parent = NULL ;
2008-07-17 20:53:50 +04:00
struct btrfs_ordered_extent * entry ;
2008-01-08 23:46:30 +03:00
while ( * p ) {
parent = * p ;
2008-07-17 20:53:50 +04:00
entry = rb_entry ( parent , struct btrfs_ordered_extent , rb_node ) ;
2008-01-08 23:46:30 +03:00
2008-07-17 20:53:50 +04:00
if ( file_offset < entry - > file_offset )
2008-01-08 23:46:30 +03:00
p = & ( * p ) - > rb_left ;
2008-07-17 20:53:50 +04:00
else if ( file_offset > = entry_end ( entry ) )
2008-01-08 23:46:30 +03:00
p = & ( * p ) - > rb_right ;
else
return parent ;
}
rb_link_node ( node , parent , p ) ;
rb_insert_color ( node , root ) ;
return NULL ;
}
2008-07-17 20:53:50 +04:00
static struct rb_node * __tree_search ( struct rb_root * root , u64 file_offset ,
struct rb_node * * prev_ret )
2008-01-08 23:46:30 +03:00
{
struct rb_node * n = root - > rb_node ;
struct rb_node * prev = NULL ;
2008-07-17 20:53:50 +04:00
struct rb_node * test ;
struct btrfs_ordered_extent * entry ;
struct btrfs_ordered_extent * prev_entry = NULL ;
2008-01-08 23:46:30 +03:00
while ( n ) {
2008-07-17 20:53:50 +04:00
entry = rb_entry ( n , struct btrfs_ordered_extent , rb_node ) ;
2008-01-08 23:46:30 +03:00
prev = n ;
prev_entry = entry ;
2008-07-17 20:53:50 +04:00
if ( file_offset < entry - > file_offset )
2008-01-08 23:46:30 +03:00
n = n - > rb_left ;
2008-07-17 20:53:50 +04:00
else if ( file_offset > = entry_end ( entry ) )
2008-01-08 23:46:30 +03:00
n = n - > rb_right ;
else
return n ;
}
if ( ! prev_ret )
return NULL ;
2008-07-17 20:53:50 +04:00
while ( prev & & file_offset > = entry_end ( prev_entry ) ) {
test = rb_next ( prev ) ;
if ( ! test )
break ;
prev_entry = rb_entry ( test , struct btrfs_ordered_extent ,
rb_node ) ;
if ( file_offset < entry_end ( prev_entry ) )
break ;
prev = test ;
}
if ( prev )
prev_entry = rb_entry ( prev , struct btrfs_ordered_extent ,
rb_node ) ;
while ( prev & & file_offset < entry_end ( prev_entry ) ) {
test = rb_prev ( prev ) ;
if ( ! test )
break ;
prev_entry = rb_entry ( test , struct btrfs_ordered_extent ,
rb_node ) ;
prev = test ;
2008-01-08 23:46:30 +03:00
}
* prev_ret = prev ;
return NULL ;
}
2008-07-17 20:53:50 +04:00
static int offset_in_entry ( struct btrfs_ordered_extent * entry , u64 file_offset )
{
if ( file_offset < entry - > file_offset | |
entry - > file_offset + entry - > len < = file_offset )
return 0 ;
return 1 ;
}
static inline struct rb_node * tree_search ( struct btrfs_ordered_inode_tree * tree ,
u64 file_offset )
2008-01-08 23:46:30 +03:00
{
2008-07-17 20:53:50 +04:00
struct rb_root * root = & tree - > tree ;
2008-01-08 23:46:30 +03:00
struct rb_node * prev ;
struct rb_node * ret ;
2008-07-17 20:53:50 +04:00
struct btrfs_ordered_extent * entry ;
if ( tree - > last ) {
entry = rb_entry ( tree - > last , struct btrfs_ordered_extent ,
rb_node ) ;
if ( offset_in_entry ( entry , file_offset ) )
return tree - > last ;
}
ret = __tree_search ( root , file_offset , & prev ) ;
2008-01-08 23:46:30 +03:00
if ( ! ret )
2008-07-17 20:53:50 +04:00
ret = prev ;
if ( ret )
tree - > last = ret ;
2008-01-08 23:46:30 +03:00
return ret ;
}
2008-07-17 21:53:27 +04:00
/* allocate and add a new ordered_extent into the per-inode tree.
* file_offset is the logical offset in the file
*
* start is the disk block number of an extent already reserved in the
* extent allocation tree
*
* len is the length of the extent
*
* This also sets the EXTENT_ORDERED bit on the range in the inode .
*
* The tree is given a single reference on the ordered extent that was
* inserted .
*/
2008-07-17 20:53:50 +04:00
int btrfs_add_ordered_extent ( struct inode * inode , u64 file_offset ,
u64 start , u64 len )
2008-01-08 23:46:30 +03:00
{
struct btrfs_ordered_inode_tree * tree ;
2008-07-17 20:53:50 +04:00
struct rb_node * node ;
struct btrfs_ordered_extent * entry ;
2008-01-08 23:46:30 +03:00
2008-07-17 20:53:50 +04:00
tree = & BTRFS_I ( inode ) - > ordered_tree ;
entry = kzalloc ( sizeof ( * entry ) , GFP_NOFS ) ;
2008-01-08 23:46:30 +03:00
if ( ! entry )
return - ENOMEM ;
2008-07-17 20:53:50 +04:00
mutex_lock ( & tree - > mutex ) ;
entry - > file_offset = file_offset ;
entry - > start = start ;
entry - > len = len ;
/* one ref for the tree */
atomic_set ( & entry - > refs , 1 ) ;
init_waitqueue_head ( & entry - > wait ) ;
INIT_LIST_HEAD ( & entry - > list ) ;
2008-01-08 23:46:30 +03:00
2008-07-17 20:53:50 +04:00
node = tree_insert ( & tree - > tree , file_offset ,
& entry - > rb_node ) ;
if ( node ) {
entry = rb_entry ( node , struct btrfs_ordered_extent , rb_node ) ;
atomic_inc ( & entry - > refs ) ;
}
set_extent_ordered ( & BTRFS_I ( inode ) - > io_tree , file_offset ,
entry_end ( entry ) - 1 , GFP_NOFS ) ;
2008-06-26 00:01:31 +04:00
2008-07-17 20:53:50 +04:00
mutex_unlock ( & tree - > mutex ) ;
BUG_ON ( node ) ;
2008-01-08 23:46:30 +03:00
return 0 ;
}
2008-07-17 21:53:27 +04:00
/*
* Add a struct btrfs_ordered_sum into the list of checksums to be inserted
* when an ordered extent is finished .
*/
2008-07-17 20:53:50 +04:00
int btrfs_add_ordered_sum ( struct inode * inode , struct btrfs_ordered_sum * sum )
2008-01-08 23:46:30 +03:00
{
2008-07-17 20:53:50 +04:00
struct btrfs_ordered_inode_tree * tree ;
2008-01-08 23:46:30 +03:00
struct rb_node * node ;
2008-07-17 20:53:50 +04:00
struct btrfs_ordered_extent * entry ;
2008-01-08 23:46:30 +03:00
2008-07-17 20:53:50 +04:00
tree = & BTRFS_I ( inode ) - > ordered_tree ;
mutex_lock ( & tree - > mutex ) ;
node = tree_search ( tree , sum - > file_offset ) ;
BUG_ON ( ! node ) ;
2008-01-08 23:46:30 +03:00
2008-07-17 20:53:50 +04:00
entry = rb_entry ( node , struct btrfs_ordered_extent , rb_node ) ;
2008-07-17 21:53:27 +04:00
BUG_ON ( ! offset_in_entry ( entry , sum - > file_offset ) ) ;
2008-01-08 23:46:30 +03:00
2008-07-17 20:53:50 +04:00
list_add_tail ( & sum - > list , & entry - > list ) ;
mutex_unlock ( & tree - > mutex ) ;
return 0 ;
2008-01-08 23:46:30 +03:00
}
2008-07-17 21:53:27 +04:00
/*
* this is used to account for finished IO across a given range
* of the file . The IO should not span ordered extents . If
* a given ordered_extent is completely done , 1 is returned , otherwise
* 0.
*
* test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
* to make sure this function only returns 1 once for a given ordered extent .
*/
2008-07-17 20:53:50 +04:00
int btrfs_dec_test_ordered_pending ( struct inode * inode ,
u64 file_offset , u64 io_size )
2008-01-08 23:46:30 +03:00
{
2008-07-17 20:53:50 +04:00
struct btrfs_ordered_inode_tree * tree ;
2008-01-08 23:46:30 +03:00
struct rb_node * node ;
2008-07-17 20:53:50 +04:00
struct btrfs_ordered_extent * entry ;
struct extent_io_tree * io_tree = & BTRFS_I ( inode ) - > io_tree ;
int ret ;
tree = & BTRFS_I ( inode ) - > ordered_tree ;
mutex_lock ( & tree - > mutex ) ;
clear_extent_ordered ( io_tree , file_offset , file_offset + io_size - 1 ,
GFP_NOFS ) ;
node = tree_search ( tree , file_offset ) ;
2008-01-08 23:46:30 +03:00
if ( ! node ) {
2008-07-17 20:53:50 +04:00
ret = 1 ;
goto out ;
2008-01-08 23:46:30 +03:00
}
2008-07-17 20:53:50 +04:00
entry = rb_entry ( node , struct btrfs_ordered_extent , rb_node ) ;
if ( ! offset_in_entry ( entry , file_offset ) ) {
ret = 1 ;
goto out ;
2008-01-08 23:46:30 +03:00
}
2008-07-17 20:53:50 +04:00
ret = test_range_bit ( io_tree , entry - > file_offset ,
entry - > file_offset + entry - > len - 1 ,
EXTENT_ORDERED , 0 ) ;
if ( ret = = 0 )
ret = test_and_set_bit ( BTRFS_ORDERED_IO_DONE , & entry - > flags ) ;
out :
mutex_unlock ( & tree - > mutex ) ;
return ret = = 0 ;
}
2008-01-08 23:46:30 +03:00
2008-07-17 21:53:27 +04:00
/*
* used to drop a reference on an ordered extent . This will free
* the extent if the last reference is dropped
*/
2008-07-17 20:53:50 +04:00
int btrfs_put_ordered_extent ( struct btrfs_ordered_extent * entry )
{
2008-07-17 20:54:15 +04:00
struct list_head * cur ;
struct btrfs_ordered_sum * sum ;
if ( atomic_dec_and_test ( & entry - > refs ) ) {
while ( ! list_empty ( & entry - > list ) ) {
cur = entry - > list . next ;
sum = list_entry ( cur , struct btrfs_ordered_sum , list ) ;
list_del ( & sum - > list ) ;
kfree ( sum ) ;
}
2008-07-17 20:53:50 +04:00
kfree ( entry ) ;
2008-07-17 20:54:15 +04:00
}
2008-07-17 20:53:50 +04:00
return 0 ;
2008-01-08 23:46:30 +03:00
}
2008-01-15 16:40:48 +03:00
2008-07-17 21:53:27 +04:00
/*
* remove an ordered extent from the tree . No references are dropped
* but , anyone waiting on this extent is woken up .
*/
2008-07-17 20:53:50 +04:00
int btrfs_remove_ordered_extent ( struct inode * inode ,
struct btrfs_ordered_extent * entry )
2008-01-15 16:40:48 +03:00
{
2008-07-17 20:53:50 +04:00
struct btrfs_ordered_inode_tree * tree ;
2008-01-15 16:40:48 +03:00
struct rb_node * node ;
2008-07-17 20:53:50 +04:00
tree = & BTRFS_I ( inode ) - > ordered_tree ;
mutex_lock ( & tree - > mutex ) ;
node = & entry - > rb_node ;
2008-01-15 16:40:48 +03:00
rb_erase ( node , & tree - > tree ) ;
2008-07-17 20:53:50 +04:00
tree - > last = NULL ;
set_bit ( BTRFS_ORDERED_COMPLETE , & entry - > flags ) ;
mutex_unlock ( & tree - > mutex ) ;
wake_up ( & entry - > wait ) ;
return 0 ;
2008-01-15 16:40:48 +03:00
}
2008-07-17 21:53:27 +04:00
/*
* Used to start IO or wait for a given ordered extent to finish .
*
* If wait is one , this effectively waits on page writeback for all the pages
* in the extent , and it waits on the io completion code to insert
* metadata into the btree corresponding to the extent
*/
void btrfs_start_ordered_extent ( struct inode * inode ,
struct btrfs_ordered_extent * entry ,
int wait )
2008-07-17 20:53:50 +04:00
{
u64 start = entry - > file_offset ;
u64 end = start + entry - > len - 1 ;
2008-05-27 18:55:43 +04:00
2008-07-17 21:53:27 +04:00
/*
* pages in the range can be dirty , clean or writeback . We
* start IO on any dirty ones so the wait doesn ' t stall waiting
* for pdflush to find them
*/
2008-07-17 20:53:50 +04:00
# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
do_sync_file_range ( file , start , end , SYNC_FILE_RANGE_WRITE ) ;
# else
do_sync_mapping_range ( inode - > i_mapping , start , end ,
SYNC_FILE_RANGE_WRITE ) ;
# endif
if ( wait )
wait_event ( entry - > wait , test_bit ( BTRFS_ORDERED_COMPLETE ,
& entry - > flags ) ) ;
}
2008-01-15 16:40:48 +03:00
2008-07-17 21:53:27 +04:00
/*
* Used to wait on ordered extents across a large range of bytes .
*/
2008-07-17 20:53:50 +04:00
void btrfs_wait_ordered_range ( struct inode * inode , u64 start , u64 len )
{
u64 end ;
struct btrfs_ordered_extent * ordered ;
int found ;
int should_wait = 0 ;
again :
if ( start + len < start )
end = ( u64 ) - 1 ;
else
end = start + len - 1 ;
found = 0 ;
while ( 1 ) {
ordered = btrfs_lookup_first_ordered_extent ( inode , end ) ;
if ( ! ordered ) {
break ;
}
if ( ordered - > file_offset > = start + len ) {
btrfs_put_ordered_extent ( ordered ) ;
break ;
}
if ( ordered - > file_offset + ordered - > len < start ) {
btrfs_put_ordered_extent ( ordered ) ;
break ;
}
btrfs_start_ordered_extent ( inode , ordered , should_wait ) ;
found + + ;
end = ordered - > file_offset ;
btrfs_put_ordered_extent ( ordered ) ;
if ( end = = 0 )
break ;
end - - ;
}
if ( should_wait & & found ) {
should_wait = 0 ;
goto again ;
2008-01-15 16:40:48 +03:00
}
}
2008-07-17 20:53:50 +04:00
2008-07-17 21:53:27 +04:00
/*
* find an ordered extent corresponding to file_offset . return NULL if
* nothing is found , otherwise take a reference on the extent and return it
*/
2008-07-17 20:53:50 +04:00
struct btrfs_ordered_extent * btrfs_lookup_ordered_extent ( struct inode * inode ,
u64 file_offset )
{
struct btrfs_ordered_inode_tree * tree ;
struct rb_node * node ;
struct btrfs_ordered_extent * entry = NULL ;
tree = & BTRFS_I ( inode ) - > ordered_tree ;
mutex_lock ( & tree - > mutex ) ;
node = tree_search ( tree , file_offset ) ;
if ( ! node )
goto out ;
entry = rb_entry ( node , struct btrfs_ordered_extent , rb_node ) ;
if ( ! offset_in_entry ( entry , file_offset ) )
entry = NULL ;
if ( entry )
atomic_inc ( & entry - > refs ) ;
out :
mutex_unlock ( & tree - > mutex ) ;
return entry ;
}
2008-07-17 21:53:27 +04:00
/*
* lookup and return any extent before ' file_offset ' . NULL is returned
* if none is found
*/
2008-07-17 20:53:50 +04:00
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent ( struct inode * inode , u64 file_offset )
{
struct btrfs_ordered_inode_tree * tree ;
struct rb_node * node ;
struct btrfs_ordered_extent * entry = NULL ;
tree = & BTRFS_I ( inode ) - > ordered_tree ;
mutex_lock ( & tree - > mutex ) ;
node = tree_search ( tree , file_offset ) ;
if ( ! node )
goto out ;
entry = rb_entry ( node , struct btrfs_ordered_extent , rb_node ) ;
atomic_inc ( & entry - > refs ) ;
out :
mutex_unlock ( & tree - > mutex ) ;
return entry ;
2008-04-25 16:51:48 +04:00
}
2008-07-17 20:54:05 +04:00
2008-07-17 21:53:27 +04:00
/*
* After an extent is done , call this to conditionally update the on disk
* i_size . i_size is updated to cover any fully written part of the file .
*/
2008-07-17 20:54:05 +04:00
int btrfs_ordered_update_i_size ( struct inode * inode ,
struct btrfs_ordered_extent * ordered )
{
struct btrfs_ordered_inode_tree * tree = & BTRFS_I ( inode ) - > ordered_tree ;
struct extent_io_tree * io_tree = & BTRFS_I ( inode ) - > io_tree ;
u64 disk_i_size ;
u64 new_i_size ;
u64 i_size_test ;
struct rb_node * node ;
struct btrfs_ordered_extent * test ;
mutex_lock ( & tree - > mutex ) ;
disk_i_size = BTRFS_I ( inode ) - > disk_i_size ;
/*
* if the disk i_size is already at the inode - > i_size , or
* this ordered extent is inside the disk i_size , we ' re done
*/
if ( disk_i_size > = inode - > i_size | |
ordered - > file_offset + ordered - > len < = disk_i_size ) {
goto out ;
}
/*
* we can ' t update the disk_isize if there are delalloc bytes
* between disk_i_size and this ordered extent
*/
if ( test_range_bit ( io_tree , disk_i_size ,
ordered - > file_offset + ordered - > len - 1 ,
EXTENT_DELALLOC , 0 ) ) {
goto out ;
}
/*
* walk backward from this ordered extent to disk_i_size .
* if we find an ordered extent then we can ' t update disk i_size
* yet
*/
2008-07-17 20:54:15 +04:00
node = & ordered - > rb_node ;
2008-07-17 20:54:05 +04:00
while ( 1 ) {
2008-07-17 20:54:15 +04:00
node = rb_prev ( node ) ;
2008-07-17 20:54:05 +04:00
if ( ! node )
break ;
test = rb_entry ( node , struct btrfs_ordered_extent , rb_node ) ;
if ( test - > file_offset + test - > len < = disk_i_size )
break ;
if ( test - > file_offset > = inode - > i_size )
break ;
if ( test - > file_offset > = disk_i_size )
goto out ;
}
new_i_size = min_t ( u64 , entry_end ( ordered ) , i_size_read ( inode ) ) ;
/*
* at this point , we know we can safely update i_size to at least
* the offset from this ordered extent . But , we need to
* walk forward and see if ios from higher up in the file have
* finished .
*/
node = rb_next ( & ordered - > rb_node ) ;
i_size_test = 0 ;
if ( node ) {
/*
* do we have an area where IO might have finished
* between our ordered extent and the next one .
*/
test = rb_entry ( node , struct btrfs_ordered_extent , rb_node ) ;
if ( test - > file_offset > entry_end ( ordered ) ) {
i_size_test = test - > file_offset - 1 ;
}
} else {
i_size_test = i_size_read ( inode ) ;
}
/*
* i_size_test is the end of a region after this ordered
* extent where there are no ordered extents . As long as there
* are no delalloc bytes in this area , it is safe to update
* disk_i_size to the end of the region .
*/
if ( i_size_test > entry_end ( ordered ) & &
! test_range_bit ( io_tree , entry_end ( ordered ) , i_size_test ,
EXTENT_DELALLOC , 0 ) ) {
new_i_size = min_t ( u64 , i_size_test , i_size_read ( inode ) ) ;
}
BTRFS_I ( inode ) - > disk_i_size = new_i_size ;
out :
mutex_unlock ( & tree - > mutex ) ;
return 0 ;
}
2008-07-17 20:54:15 +04:00
2008-07-17 21:53:27 +04:00
/*
* search the ordered extents for one corresponding to ' offset ' and
* try to find a checksum . This is used because we allow pages to
* be reclaimed before their checksum is actually put into the btree
*/
2008-07-17 20:54:15 +04:00
int btrfs_find_ordered_sum ( struct inode * inode , u64 offset , u32 * sum )
{
struct btrfs_ordered_sum * ordered_sum ;
struct btrfs_sector_sum * sector_sums ;
struct btrfs_ordered_extent * ordered ;
struct btrfs_ordered_inode_tree * tree = & BTRFS_I ( inode ) - > ordered_tree ;
struct list_head * cur ;
int ret = 1 ;
int index ;
ordered = btrfs_lookup_ordered_extent ( inode , offset ) ;
if ( ! ordered )
return 1 ;
mutex_lock ( & tree - > mutex ) ;
list_for_each_prev ( cur , & ordered - > list ) {
ordered_sum = list_entry ( cur , struct btrfs_ordered_sum , list ) ;
if ( offset > = ordered_sum - > file_offset & &
offset < ordered_sum - > file_offset + ordered_sum - > len ) {
index = ( offset - ordered_sum - > file_offset ) /
BTRFS_I ( inode ) - > root - > sectorsize ; ;
sector_sums = & ordered_sum - > sums ;
* sum = sector_sums [ index ] . sum ;
ret = 0 ;
goto out ;
}
}
out :
mutex_unlock ( & tree - > mutex ) ;
return ret ;
}