2007-08-28 00:49:44 +04:00
# include <linux/bitops.h>
# include <linux/slab.h>
# include <linux/bio.h>
# include <linux/mm.h>
# include <linux/gfp.h>
# include <linux/pagemap.h>
# include <linux/page-flags.h>
# include <linux/module.h>
# include <linux/spinlock.h>
# include <linux/blkdev.h>
# include "extent_map.h"
static struct kmem_cache * extent_map_cache ;
static struct kmem_cache * extent_state_cache ;
struct tree_entry {
u64 start ;
u64 end ;
int in_tree ;
struct rb_node rb_node ;
} ;
/* bits for the extent state */
# define EXTENT_DIRTY 1
# define EXTENT_WRITEBACK (1 << 1)
# define EXTENT_UPTODATE (1 << 2)
# define EXTENT_LOCKED (1 << 3)
# define EXTENT_NEW (1 << 4)
# define EXTENT_DELALLOC (1 << 5)
# define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
void __init extent_map_init ( void )
{
extent_map_cache = kmem_cache_create ( " extent_map " ,
sizeof ( struct extent_map ) , 0 ,
SLAB_RECLAIM_ACCOUNT |
SLAB_DESTROY_BY_RCU ,
NULL ) ;
extent_state_cache = kmem_cache_create ( " extent_state " ,
sizeof ( struct extent_state ) , 0 ,
SLAB_RECLAIM_ACCOUNT |
SLAB_DESTROY_BY_RCU ,
NULL ) ;
}
void __exit extent_map_exit ( void )
{
if ( extent_map_cache )
kmem_cache_destroy ( extent_map_cache ) ;
if ( extent_state_cache )
kmem_cache_destroy ( extent_state_cache ) ;
}
void extent_map_tree_init ( struct extent_map_tree * tree ,
struct address_space * mapping , gfp_t mask )
{
tree - > map . rb_node = NULL ;
tree - > state . rb_node = NULL ;
2007-08-30 16:50:51 +04:00
tree - > ops = NULL ;
2007-08-28 00:49:44 +04:00
rwlock_init ( & tree - > lock ) ;
tree - > mapping = mapping ;
}
EXPORT_SYMBOL ( extent_map_tree_init ) ;
struct extent_map * alloc_extent_map ( gfp_t mask )
{
struct extent_map * em ;
em = kmem_cache_alloc ( extent_map_cache , mask ) ;
if ( ! em | | IS_ERR ( em ) )
return em ;
em - > in_tree = 0 ;
atomic_set ( & em - > refs , 1 ) ;
return em ;
}
EXPORT_SYMBOL ( alloc_extent_map ) ;
void free_extent_map ( struct extent_map * em )
{
if ( atomic_dec_and_test ( & em - > refs ) ) {
WARN_ON ( em - > in_tree ) ;
kmem_cache_free ( extent_map_cache , em ) ;
}
}
EXPORT_SYMBOL ( free_extent_map ) ;
struct extent_state * alloc_extent_state ( gfp_t mask )
{
struct extent_state * state ;
state = kmem_cache_alloc ( extent_state_cache , mask ) ;
if ( ! state | | IS_ERR ( state ) )
return state ;
state - > state = 0 ;
state - > in_tree = 0 ;
2007-08-30 16:50:51 +04:00
state - > private = 0 ;
2007-08-28 00:49:44 +04:00
atomic_set ( & state - > refs , 1 ) ;
init_waitqueue_head ( & state - > wq ) ;
return state ;
}
EXPORT_SYMBOL ( alloc_extent_state ) ;
void free_extent_state ( struct extent_state * state )
{
if ( atomic_dec_and_test ( & state - > refs ) ) {
WARN_ON ( state - > in_tree ) ;
kmem_cache_free ( extent_state_cache , state ) ;
}
}
EXPORT_SYMBOL ( free_extent_state ) ;
static struct rb_node * tree_insert ( struct rb_root * root , u64 offset ,
struct rb_node * node )
{
struct rb_node * * p = & root - > rb_node ;
struct rb_node * parent = NULL ;
struct tree_entry * entry ;
while ( * p ) {
parent = * p ;
entry = rb_entry ( parent , struct tree_entry , rb_node ) ;
if ( offset < entry - > start )
p = & ( * p ) - > rb_left ;
else if ( offset > entry - > end )
p = & ( * p ) - > rb_right ;
else
return parent ;
}
entry = rb_entry ( node , struct tree_entry , rb_node ) ;
entry - > in_tree = 1 ;
rb_link_node ( node , parent , p ) ;
rb_insert_color ( node , root ) ;
return NULL ;
}
static struct rb_node * __tree_search ( struct rb_root * root , u64 offset ,
struct rb_node * * prev_ret )
{
struct rb_node * n = root - > rb_node ;
struct rb_node * prev = NULL ;
struct tree_entry * entry ;
struct tree_entry * prev_entry = NULL ;
while ( n ) {
entry = rb_entry ( n , struct tree_entry , rb_node ) ;
prev = n ;
prev_entry = entry ;
if ( offset < entry - > start )
n = n - > rb_left ;
else if ( offset > entry - > end )
n = n - > rb_right ;
else
return n ;
}
if ( ! prev_ret )
return NULL ;
while ( prev & & offset > prev_entry - > end ) {
prev = rb_next ( prev ) ;
prev_entry = rb_entry ( prev , struct tree_entry , rb_node ) ;
}
* prev_ret = prev ;
return NULL ;
}
static inline struct rb_node * tree_search ( struct rb_root * root , u64 offset )
{
struct rb_node * prev ;
struct rb_node * ret ;
ret = __tree_search ( root , offset , & prev ) ;
if ( ! ret )
return prev ;
return ret ;
}
static int tree_delete ( struct rb_root * root , u64 offset )
{
struct rb_node * node ;
struct tree_entry * entry ;
node = __tree_search ( root , offset , NULL ) ;
if ( ! node )
return - ENOENT ;
entry = rb_entry ( node , struct tree_entry , rb_node ) ;
entry - > in_tree = 0 ;
rb_erase ( node , root ) ;
return 0 ;
}
/*
* add_extent_mapping tries a simple backward merge with existing
* mappings . The extent_map struct passed in will be inserted into
* the tree directly ( no copies made , just a reference taken ) .
*/
int add_extent_mapping ( struct extent_map_tree * tree ,
struct extent_map * em )
{
int ret = 0 ;
struct extent_map * prev = NULL ;
struct rb_node * rb ;
write_lock_irq ( & tree - > lock ) ;
rb = tree_insert ( & tree - > map , em - > end , & em - > rb_node ) ;
if ( rb ) {
prev = rb_entry ( rb , struct extent_map , rb_node ) ;
printk ( " found extent map %Lu %Lu on insert of %Lu %Lu \n " , prev - > start , prev - > end , em - > start , em - > end ) ;
ret = - EEXIST ;
goto out ;
}
atomic_inc ( & em - > refs ) ;
if ( em - > start ! = 0 ) {
rb = rb_prev ( & em - > rb_node ) ;
if ( rb )
prev = rb_entry ( rb , struct extent_map , rb_node ) ;
if ( prev & & prev - > end + 1 = = em - > start & &
( ( em - > block_start = = 0 & & prev - > block_start = = 0 ) | |
( em - > block_start = = prev - > block_end + 1 ) ) ) {
em - > start = prev - > start ;
em - > block_start = prev - > block_start ;
rb_erase ( & prev - > rb_node , & tree - > map ) ;
prev - > in_tree = 0 ;
free_extent_map ( prev ) ;
}
}
out :
write_unlock_irq ( & tree - > lock ) ;
return ret ;
}
EXPORT_SYMBOL ( add_extent_mapping ) ;
/*
* lookup_extent_mapping returns the first extent_map struct in the
* tree that intersects the [ start , end ] ( inclusive ) range . There may
* be additional objects in the tree that intersect , so check the object
* returned carefully to make sure you don ' t need additional lookups .
*/
struct extent_map * lookup_extent_mapping ( struct extent_map_tree * tree ,
u64 start , u64 end )
{
struct extent_map * em ;
struct rb_node * rb_node ;
read_lock_irq ( & tree - > lock ) ;
rb_node = tree_search ( & tree - > map , start ) ;
if ( ! rb_node ) {
em = NULL ;
goto out ;
}
if ( IS_ERR ( rb_node ) ) {
em = ERR_PTR ( PTR_ERR ( rb_node ) ) ;
goto out ;
}
em = rb_entry ( rb_node , struct extent_map , rb_node ) ;
if ( em - > end < start | | em - > start > end ) {
em = NULL ;
goto out ;
}
atomic_inc ( & em - > refs ) ;
out :
read_unlock_irq ( & tree - > lock ) ;
return em ;
}
EXPORT_SYMBOL ( lookup_extent_mapping ) ;
/*
* removes an extent_map struct from the tree . No reference counts are
* dropped , and no checks are done to see if the range is in use
*/
int remove_extent_mapping ( struct extent_map_tree * tree , struct extent_map * em )
{
int ret ;
write_lock_irq ( & tree - > lock ) ;
ret = tree_delete ( & tree - > map , em - > end ) ;
write_unlock_irq ( & tree - > lock ) ;
return ret ;
}
EXPORT_SYMBOL ( remove_extent_mapping ) ;
/*
* utility function to look for merge candidates inside a given range .
* Any extents with matching state are merged together into a single
* extent in the tree . Extents with EXTENT_IO in their state field
* are not merged because the end_io handlers need to be able to do
* operations on them without sleeping ( or doing allocations / splits ) .
*
* This should be called with the tree lock held .
*/
static int merge_state ( struct extent_map_tree * tree ,
struct extent_state * state )
{
struct extent_state * other ;
struct rb_node * other_node ;
if ( state - > state & EXTENT_IOBITS )
return 0 ;
other_node = rb_prev ( & state - > rb_node ) ;
if ( other_node ) {
other = rb_entry ( other_node , struct extent_state , rb_node ) ;
if ( other - > end = = state - > start - 1 & &
other - > state = = state - > state ) {
state - > start = other - > start ;
other - > in_tree = 0 ;
rb_erase ( & other - > rb_node , & tree - > state ) ;
free_extent_state ( other ) ;
}
}
other_node = rb_next ( & state - > rb_node ) ;
if ( other_node ) {
other = rb_entry ( other_node , struct extent_state , rb_node ) ;
if ( other - > start = = state - > end + 1 & &
other - > state = = state - > state ) {
other - > start = state - > start ;
state - > in_tree = 0 ;
rb_erase ( & state - > rb_node , & tree - > state ) ;
free_extent_state ( state ) ;
}
}
return 0 ;
}
/*
* insert an extent_state struct into the tree . ' bits ' are set on the
* struct before it is inserted .
*
* This may return - EEXIST if the extent is already there , in which case the
* state struct is freed .
*
* The tree lock is not taken internally . This is a utility function and
* probably isn ' t what you want to call ( see set / clear_extent_bit ) .
*/
static int insert_state ( struct extent_map_tree * tree ,
struct extent_state * state , u64 start , u64 end ,
int bits )
{
struct rb_node * node ;
if ( end < start ) {
printk ( " end < start %Lu %Lu \n " , end , start ) ;
WARN_ON ( 1 ) ;
}
state - > state | = bits ;
state - > start = start ;
state - > end = end ;
if ( ( end & 4095 ) = = 0 ) {
printk ( " insert state %Lu %Lu strange end \n " , start , end ) ;
WARN_ON ( 1 ) ;
}
node = tree_insert ( & tree - > state , end , & state - > rb_node ) ;
if ( node ) {
struct extent_state * found ;
found = rb_entry ( node , struct extent_state , rb_node ) ;
2007-08-28 00:49:44 +04:00
printk ( " found node %Lu %Lu on insert of %Lu %Lu \n " , found - > start , found - > end , start , end ) ;
2007-08-28 00:49:44 +04:00
free_extent_state ( state ) ;
return - EEXIST ;
}
merge_state ( tree , state ) ;
return 0 ;
}
/*
* split a given extent state struct in two , inserting the preallocated
* struct ' prealloc ' as the newly created second half . ' split ' indicates an
* offset inside ' orig ' where it should be split .
*
* Before calling ,
* the tree has ' orig ' at [ orig - > start , orig - > end ] . After calling , there
* are two extent state structs in the tree :
* prealloc : [ orig - > start , split - 1 ]
* orig : [ split , orig - > end ]
*
* The tree locks are not taken by this function . They need to be held
* by the caller .
*/
static int split_state ( struct extent_map_tree * tree , struct extent_state * orig ,
struct extent_state * prealloc , u64 split )
{
struct rb_node * node ;
prealloc - > start = orig - > start ;
prealloc - > end = split - 1 ;
prealloc - > state = orig - > state ;
orig - > start = split ;
if ( ( prealloc - > end & 4095 ) = = 0 ) {
printk ( " insert state %Lu %Lu strange end \n " , prealloc - > start ,
prealloc - > end ) ;
WARN_ON ( 1 ) ;
}
node = tree_insert ( & tree - > state , prealloc - > end , & prealloc - > rb_node ) ;
if ( node ) {
struct extent_state * found ;
found = rb_entry ( node , struct extent_state , rb_node ) ;
2007-08-28 00:49:44 +04:00
printk ( " found node %Lu %Lu on insert of %Lu %Lu \n " , found - > start , found - > end , prealloc - > start , prealloc - > end ) ;
2007-08-28 00:49:44 +04:00
free_extent_state ( prealloc ) ;
return - EEXIST ;
}
return 0 ;
}
/*
* utility function to clear some bits in an extent state struct .
* it will optionally wake up any one waiting on this state ( wake = = 1 ) , or
* forcibly remove the state from the tree ( delete = = 1 ) .
*
* If no bits are set on the state struct after clearing things , the
* struct is freed and removed from the tree
*/
static int clear_state_bit ( struct extent_map_tree * tree ,
struct extent_state * state , int bits , int wake ,
int delete )
{
int ret = state - > state & bits ;
state - > state & = ~ bits ;
if ( wake )
wake_up ( & state - > wq ) ;
if ( delete | | state - > state = = 0 ) {
if ( state - > in_tree ) {
rb_erase ( & state - > rb_node , & tree - > state ) ;
state - > in_tree = 0 ;
free_extent_state ( state ) ;
} else {
WARN_ON ( 1 ) ;
}
} else {
merge_state ( tree , state ) ;
}
return ret ;
}
/*
* clear some bits on a range in the tree . This may require splitting
* or inserting elements in the tree , so the gfp mask is used to
* indicate which allocations or sleeping are allowed .
*
* pass ' wake ' = = 1 to kick any sleepers , and ' delete ' = = 1 to remove
* the given range from the tree regardless of state ( ie for truncate ) .
*
* the range [ start , end ] is inclusive .
*
* This takes the tree lock , and returns < 0 on error , > 0 if any of the
* bits were already set , or zero if none of the bits were already set .
*/
int clear_extent_bit ( struct extent_map_tree * tree , u64 start , u64 end ,
int bits , int wake , int delete , gfp_t mask )
{
struct extent_state * state ;
struct extent_state * prealloc = NULL ;
struct rb_node * node ;
int err ;
int set = 0 ;
again :
if ( ! prealloc & & ( mask & __GFP_WAIT ) ) {
prealloc = alloc_extent_state ( mask ) ;
if ( ! prealloc )
return - ENOMEM ;
}
write_lock_irq ( & tree - > lock ) ;
/*
* this search will find the extents that end after
* our range starts
*/
node = tree_search ( & tree - > state , start ) ;
if ( ! node )
goto out ;
state = rb_entry ( node , struct extent_state , rb_node ) ;
if ( state - > start > end )
goto out ;
WARN_ON ( state - > end < start ) ;
/*
* | - - - - desired range - - - - |
* | state | or
* | - - - - - - - - - - - - - state - - - - - - - - - - - - - - |
*
* We need to split the extent we found , and may flip
* bits on second half .
*
* If the extent we found extends past our range , we
* just split and search again . It ' ll get split again
* the next time though .
*
* If the extent we found is inside our range , we clear
* the desired bit on it .
*/
if ( state - > start < start ) {
err = split_state ( tree , state , prealloc , start ) ;
BUG_ON ( err = = - EEXIST ) ;
prealloc = NULL ;
if ( err )
goto out ;
if ( state - > end < = end ) {
start = state - > end + 1 ;
set | = clear_state_bit ( tree , state , bits ,
wake , delete ) ;
} else {
start = state - > start ;
}
goto search_again ;
}
/*
* | - - - - desired range - - - - |
* | state |
* We need to split the extent , and clear the bit
* on the first half
*/
if ( state - > start < = end & & state - > end > end ) {
err = split_state ( tree , state , prealloc , end + 1 ) ;
BUG_ON ( err = = - EEXIST ) ;
if ( wake )
wake_up ( & state - > wq ) ;
set | = clear_state_bit ( tree , prealloc , bits ,
wake , delete ) ;
prealloc = NULL ;
goto out ;
}
start = state - > end + 1 ;
set | = clear_state_bit ( tree , state , bits , wake , delete ) ;
goto search_again ;
out :
write_unlock_irq ( & tree - > lock ) ;
if ( prealloc )
free_extent_state ( prealloc ) ;
return set ;
search_again :
if ( start > = end )
goto out ;
write_unlock_irq ( & tree - > lock ) ;
if ( mask & __GFP_WAIT )
cond_resched ( ) ;
goto again ;
}
EXPORT_SYMBOL ( clear_extent_bit ) ;
static int wait_on_state ( struct extent_map_tree * tree ,
struct extent_state * state )
{
DEFINE_WAIT ( wait ) ;
prepare_to_wait ( & state - > wq , & wait , TASK_UNINTERRUPTIBLE ) ;
read_unlock_irq ( & tree - > lock ) ;
schedule ( ) ;
read_lock_irq ( & tree - > lock ) ;
finish_wait ( & state - > wq , & wait ) ;
return 0 ;
}
/*
* waits for one or more bits to clear on a range in the state tree .
* The range [ start , end ] is inclusive .
* The tree lock is taken by this function
*/
int wait_extent_bit ( struct extent_map_tree * tree , u64 start , u64 end , int bits )
{
struct extent_state * state ;
struct rb_node * node ;
read_lock_irq ( & tree - > lock ) ;
again :
while ( 1 ) {
/*
* this search will find all the extents that end after
* our range starts
*/
node = tree_search ( & tree - > state , start ) ;
if ( ! node )
break ;
state = rb_entry ( node , struct extent_state , rb_node ) ;
if ( state - > start > end )
goto out ;
if ( state - > state & bits ) {
start = state - > start ;
atomic_inc ( & state - > refs ) ;
wait_on_state ( tree , state ) ;
free_extent_state ( state ) ;
goto again ;
}
start = state - > end + 1 ;
if ( start > end )
break ;
if ( need_resched ( ) ) {
read_unlock_irq ( & tree - > lock ) ;
cond_resched ( ) ;
read_lock_irq ( & tree - > lock ) ;
}
}
out :
read_unlock_irq ( & tree - > lock ) ;
return 0 ;
}
EXPORT_SYMBOL ( wait_extent_bit ) ;
/*
* set some bits on a range in the tree . This may require allocations
* or sleeping , so the gfp mask is used to indicate what is allowed .
*
* If ' exclusive ' = = 1 , this will fail with - EEXIST if some part of the
* range already has the desired bits set . The start of the existing
* range is returned in failed_start in this case .
*
* [ start , end ] is inclusive
* This takes the tree lock .
*/
int set_extent_bit ( struct extent_map_tree * tree , u64 start , u64 end , int bits ,
int exclusive , u64 * failed_start , gfp_t mask )
{
struct extent_state * state ;
struct extent_state * prealloc = NULL ;
struct rb_node * node ;
int err = 0 ;
int set ;
u64 last_start ;
u64 last_end ;
again :
if ( ! prealloc & & ( mask & __GFP_WAIT ) ) {
prealloc = alloc_extent_state ( mask ) ;
if ( ! prealloc )
return - ENOMEM ;
}
write_lock_irq ( & tree - > lock ) ;
/*
* this search will find all the extents that end after
* our range starts .
*/
node = tree_search ( & tree - > state , start ) ;
if ( ! node ) {
err = insert_state ( tree , prealloc , start , end , bits ) ;
prealloc = NULL ;
BUG_ON ( err = = - EEXIST ) ;
goto out ;
}
state = rb_entry ( node , struct extent_state , rb_node ) ;
last_start = state - > start ;
last_end = state - > end ;
/*
* | - - - - desired range - - - - |
* | state |
*
* Just lock what we found and keep going
*/
if ( state - > start = = start & & state - > end < = end ) {
set = state - > state & bits ;
if ( set & & exclusive ) {
* failed_start = state - > start ;
err = - EEXIST ;
goto out ;
}
state - > state | = bits ;
start = state - > end + 1 ;
merge_state ( tree , state ) ;
goto search_again ;
}
/*
* | - - - - desired range - - - - |
* | state |
* or
* | - - - - - - - - - - - - - state - - - - - - - - - - - - - - |
*
* We need to split the extent we found , and may flip bits on
* second half .
*
* If the extent we found extends past our
* range , we just split and search again . It ' ll get split
* again the next time though .
*
* If the extent we found is inside our range , we set the
* desired bit on it .
*/
if ( state - > start < start ) {
set = state - > state & bits ;
if ( exclusive & & set ) {
* failed_start = start ;
err = - EEXIST ;
goto out ;
}
err = split_state ( tree , state , prealloc , start ) ;
BUG_ON ( err = = - EEXIST ) ;
prealloc = NULL ;
if ( err )
goto out ;
if ( state - > end < = end ) {
state - > state | = bits ;
start = state - > end + 1 ;
merge_state ( tree , state ) ;
} else {
start = state - > start ;
}
goto search_again ;
}
/*
* | - - - - desired range - - - - |
* | state |
* We need to split the extent , and set the bit
* on the first half
*/
if ( state - > start < = end & & state - > end > end ) {
set = state - > state & bits ;
if ( exclusive & & set ) {
* failed_start = start ;
err = - EEXIST ;
goto out ;
}
err = split_state ( tree , state , prealloc , end + 1 ) ;
BUG_ON ( err = = - EEXIST ) ;
prealloc - > state | = bits ;
merge_state ( tree , prealloc ) ;
prealloc = NULL ;
goto out ;
}
/*
* | - - - - desired range - - - - |
* | state | or | state |
*
* There ' s a hole , we need to insert something in it and
* ignore the extent we found .
*/
if ( state - > start > start ) {
u64 this_end ;
if ( end < last_start )
this_end = end ;
else
this_end = last_start - 1 ;
err = insert_state ( tree , prealloc , start , this_end ,
bits ) ;
prealloc = NULL ;
BUG_ON ( err = = - EEXIST ) ;
if ( err )
goto out ;
start = this_end + 1 ;
goto search_again ;
}
goto search_again ;
out :
write_unlock_irq ( & tree - > lock ) ;
if ( prealloc )
free_extent_state ( prealloc ) ;
return err ;
search_again :
if ( start > end )
goto out ;
write_unlock_irq ( & tree - > lock ) ;
if ( mask & __GFP_WAIT )
cond_resched ( ) ;
goto again ;
}
EXPORT_SYMBOL ( set_extent_bit ) ;
/* wrappers around set/clear extent bit */
int set_extent_dirty ( struct extent_map_tree * tree , u64 start , u64 end ,
gfp_t mask )
{
return set_extent_bit ( tree , start , end , EXTENT_DIRTY , 0 , NULL ,
mask ) ;
}
EXPORT_SYMBOL ( set_extent_dirty ) ;
2007-08-28 00:49:44 +04:00
int set_extent_delalloc ( struct extent_map_tree * tree , u64 start , u64 end ,
gfp_t mask )
{
return set_extent_bit ( tree , start , end ,
EXTENT_DELALLOC | EXTENT_DIRTY , 0 , NULL ,
mask ) ;
}
EXPORT_SYMBOL ( set_extent_delalloc ) ;
2007-08-28 00:49:44 +04:00
int clear_extent_dirty ( struct extent_map_tree * tree , u64 start , u64 end ,
gfp_t mask )
{
2007-08-28 00:49:44 +04:00
return clear_extent_bit ( tree , start , end ,
EXTENT_DIRTY | EXTENT_DELALLOC , 0 , 0 , mask ) ;
2007-08-28 00:49:44 +04:00
}
EXPORT_SYMBOL ( clear_extent_dirty ) ;
int set_extent_new ( struct extent_map_tree * tree , u64 start , u64 end ,
gfp_t mask )
{
return set_extent_bit ( tree , start , end , EXTENT_NEW , 0 , NULL ,
mask ) ;
}
EXPORT_SYMBOL ( set_extent_new ) ;
int clear_extent_new ( struct extent_map_tree * tree , u64 start , u64 end ,
gfp_t mask )
{
return clear_extent_bit ( tree , start , end , EXTENT_NEW , 0 , 0 , mask ) ;
}
EXPORT_SYMBOL ( clear_extent_new ) ;
int set_extent_uptodate ( struct extent_map_tree * tree , u64 start , u64 end ,
gfp_t mask )
{
return set_extent_bit ( tree , start , end , EXTENT_UPTODATE , 0 , NULL ,
mask ) ;
}
EXPORT_SYMBOL ( set_extent_uptodate ) ;
int clear_extent_uptodate ( struct extent_map_tree * tree , u64 start , u64 end ,
gfp_t mask )
{
return clear_extent_bit ( tree , start , end , EXTENT_UPTODATE , 0 , 0 , mask ) ;
}
EXPORT_SYMBOL ( clear_extent_uptodate ) ;
int set_extent_writeback ( struct extent_map_tree * tree , u64 start , u64 end ,
gfp_t mask )
{
return set_extent_bit ( tree , start , end , EXTENT_WRITEBACK ,
0 , NULL , mask ) ;
}
EXPORT_SYMBOL ( set_extent_writeback ) ;
int clear_extent_writeback ( struct extent_map_tree * tree , u64 start , u64 end ,
gfp_t mask )
{
return clear_extent_bit ( tree , start , end , EXTENT_WRITEBACK , 1 , 0 , mask ) ;
}
EXPORT_SYMBOL ( clear_extent_writeback ) ;
int wait_on_extent_writeback ( struct extent_map_tree * tree , u64 start , u64 end )
{
return wait_extent_bit ( tree , start , end , EXTENT_WRITEBACK ) ;
}
EXPORT_SYMBOL ( wait_on_extent_writeback ) ;
/*
* locks a range in ascending order , waiting for any locked regions
* it hits on the way . [ start , end ] are inclusive , and this will sleep .
*/
int lock_extent ( struct extent_map_tree * tree , u64 start , u64 end , gfp_t mask )
{
int err ;
u64 failed_start ;
while ( 1 ) {
err = set_extent_bit ( tree , start , end , EXTENT_LOCKED , 1 ,
& failed_start , mask ) ;
if ( err = = - EEXIST & & ( mask & __GFP_WAIT ) ) {
wait_extent_bit ( tree , failed_start , end , EXTENT_LOCKED ) ;
start = failed_start ;
} else {
break ;
}
WARN_ON ( start > end ) ;
}
return err ;
}
EXPORT_SYMBOL ( lock_extent ) ;
int unlock_extent ( struct extent_map_tree * tree , u64 start , u64 end ,
gfp_t mask )
{
return clear_extent_bit ( tree , start , end , EXTENT_LOCKED , 1 , 0 , mask ) ;
}
EXPORT_SYMBOL ( unlock_extent ) ;
/*
* helper function to set pages and extents in the tree dirty
*/
int set_range_dirty ( struct extent_map_tree * tree , u64 start , u64 end )
{
unsigned long index = start > > PAGE_CACHE_SHIFT ;
unsigned long end_index = end > > PAGE_CACHE_SHIFT ;
struct page * page ;
while ( index < = end_index ) {
page = find_get_page ( tree - > mapping , index ) ;
BUG_ON ( ! page ) ;
__set_page_dirty_nobuffers ( page ) ;
page_cache_release ( page ) ;
index + + ;
}
set_extent_dirty ( tree , start , end , GFP_NOFS ) ;
return 0 ;
}
EXPORT_SYMBOL ( set_range_dirty ) ;
/*
* helper function to set both pages and extents in the tree writeback
*/
int set_range_writeback ( struct extent_map_tree * tree , u64 start , u64 end )
{
unsigned long index = start > > PAGE_CACHE_SHIFT ;
unsigned long end_index = end > > PAGE_CACHE_SHIFT ;
struct page * page ;
while ( index < = end_index ) {
page = find_get_page ( tree - > mapping , index ) ;
BUG_ON ( ! page ) ;
set_page_writeback ( page ) ;
page_cache_release ( page ) ;
index + + ;
}
set_extent_writeback ( tree , start , end , GFP_NOFS ) ;
return 0 ;
}
EXPORT_SYMBOL ( set_range_writeback ) ;
2007-08-28 00:49:44 +04:00
u64 find_lock_delalloc_range ( struct extent_map_tree * tree ,
u64 start , u64 lock_start , u64 * end , u64 max_bytes )
{
struct rb_node * node ;
struct extent_state * state ;
u64 cur_start = start ;
u64 found = 0 ;
u64 total_bytes = 0 ;
write_lock_irq ( & tree - > lock ) ;
/*
* this search will find all the extents that end after
* our range starts .
*/
search_again :
node = tree_search ( & tree - > state , cur_start ) ;
if ( ! node | | IS_ERR ( node ) ) {
goto out ;
}
while ( 1 ) {
state = rb_entry ( node , struct extent_state , rb_node ) ;
if ( state - > start ! = cur_start ) {
goto out ;
}
if ( ! ( state - > state & EXTENT_DELALLOC ) ) {
goto out ;
}
if ( state - > start > = lock_start ) {
if ( state - > state & EXTENT_LOCKED ) {
DEFINE_WAIT ( wait ) ;
atomic_inc ( & state - > refs ) ;
write_unlock_irq ( & tree - > lock ) ;
schedule ( ) ;
write_lock_irq ( & tree - > lock ) ;
finish_wait ( & state - > wq , & wait ) ;
free_extent_state ( state ) ;
goto search_again ;
}
state - > state | = EXTENT_LOCKED ;
}
found + + ;
* end = state - > end ;
cur_start = state - > end + 1 ;
node = rb_next ( node ) ;
if ( ! node )
break ;
total_bytes = state - > end - state - > start + 1 ;
if ( total_bytes > = max_bytes )
break ;
}
out :
write_unlock_irq ( & tree - > lock ) ;
return found ;
}
2007-08-28 00:49:44 +04:00
/*
* helper function to lock both pages and extents in the tree .
* pages must be locked first .
*/
int lock_range ( struct extent_map_tree * tree , u64 start , u64 end )
{
unsigned long index = start > > PAGE_CACHE_SHIFT ;
unsigned long end_index = end > > PAGE_CACHE_SHIFT ;
struct page * page ;
int err ;
while ( index < = end_index ) {
page = grab_cache_page ( tree - > mapping , index ) ;
if ( ! page ) {
err = - ENOMEM ;
goto failed ;
}
if ( IS_ERR ( page ) ) {
err = PTR_ERR ( page ) ;
goto failed ;
}
index + + ;
}
lock_extent ( tree , start , end , GFP_NOFS ) ;
return 0 ;
failed :
/*
* we failed above in getting the page at ' index ' , so we undo here
* up to but not including the page at ' index '
*/
end_index = index ;
index = start > > PAGE_CACHE_SHIFT ;
while ( index < end_index ) {
page = find_get_page ( tree - > mapping , index ) ;
unlock_page ( page ) ;
page_cache_release ( page ) ;
index + + ;
}
return err ;
}
EXPORT_SYMBOL ( lock_range ) ;
/*
* helper function to unlock both pages and extents in the tree .
*/
int unlock_range ( struct extent_map_tree * tree , u64 start , u64 end )
{
unsigned long index = start > > PAGE_CACHE_SHIFT ;
unsigned long end_index = end > > PAGE_CACHE_SHIFT ;
struct page * page ;
while ( index < = end_index ) {
page = find_get_page ( tree - > mapping , index ) ;
unlock_page ( page ) ;
page_cache_release ( page ) ;
index + + ;
}
unlock_extent ( tree , start , end , GFP_NOFS ) ;
return 0 ;
}
EXPORT_SYMBOL ( unlock_range ) ;
2007-08-30 16:50:51 +04:00
int set_state_private ( struct extent_map_tree * tree , u64 start , u64 private )
{
struct rb_node * node ;
struct extent_state * state ;
int ret = 0 ;
write_lock_irq ( & tree - > lock ) ;
/*
* this search will find all the extents that end after
* our range starts .
*/
node = tree_search ( & tree - > state , start ) ;
if ( ! node | | IS_ERR ( node ) ) {
ret = - ENOENT ;
goto out ;
}
state = rb_entry ( node , struct extent_state , rb_node ) ;
if ( state - > start ! = start ) {
ret = - ENOENT ;
goto out ;
}
state - > private = private ;
out :
write_unlock_irq ( & tree - > lock ) ;
return ret ;
}
int get_state_private ( struct extent_map_tree * tree , u64 start , u64 * private )
{
struct rb_node * node ;
struct extent_state * state ;
int ret = 0 ;
read_lock_irq ( & tree - > lock ) ;
/*
* this search will find all the extents that end after
* our range starts .
*/
node = tree_search ( & tree - > state , start ) ;
if ( ! node | | IS_ERR ( node ) ) {
ret = - ENOENT ;
goto out ;
}
state = rb_entry ( node , struct extent_state , rb_node ) ;
if ( state - > start ! = start ) {
ret = - ENOENT ;
goto out ;
}
* private = state - > private ;
out :
read_unlock_irq ( & tree - > lock ) ;
return ret ;
}
2007-08-28 00:49:44 +04:00
/*
* searches a range in the state tree for a given mask .
* If ' filled ' = = 1 , this returns 1 only if ever extent in the tree
* has the bits set . Otherwise , 1 is returned if any bit in the
* range is found set .
*/
static int test_range_bit ( struct extent_map_tree * tree , u64 start , u64 end ,
int bits , int filled )
{
struct extent_state * state = NULL ;
struct rb_node * node ;
int bitset = 0 ;
read_lock_irq ( & tree - > lock ) ;
node = tree_search ( & tree - > state , start ) ;
while ( node & & start < = end ) {
state = rb_entry ( node , struct extent_state , rb_node ) ;
if ( state - > start > end )
break ;
if ( filled & & state - > start > start ) {
bitset = 0 ;
break ;
}
if ( state - > state & bits ) {
bitset = 1 ;
if ( ! filled )
break ;
} else if ( filled ) {
bitset = 0 ;
break ;
}
start = state - > end + 1 ;
if ( start > end )
break ;
node = rb_next ( node ) ;
}
read_unlock_irq ( & tree - > lock ) ;
return bitset ;
}
/*
* helper function to set a given page up to date if all the
* extents in the tree for that page are up to date
*/
static int check_page_uptodate ( struct extent_map_tree * tree ,
struct page * page )
{
u64 start = page - > index < < PAGE_CACHE_SHIFT ;
u64 end = start + PAGE_CACHE_SIZE - 1 ;
if ( test_range_bit ( tree , start , end , EXTENT_UPTODATE , 1 ) )
SetPageUptodate ( page ) ;
return 0 ;
}
/*
* helper function to unlock a page if all the extents in the tree
* for that page are unlocked
*/
static int check_page_locked ( struct extent_map_tree * tree ,
struct page * page )
{
u64 start = page - > index < < PAGE_CACHE_SHIFT ;
u64 end = start + PAGE_CACHE_SIZE - 1 ;
if ( ! test_range_bit ( tree , start , end , EXTENT_LOCKED , 0 ) )
unlock_page ( page ) ;
return 0 ;
}
/*
* helper function to end page writeback if all the extents
* in the tree for that page are done with writeback
*/
static int check_page_writeback ( struct extent_map_tree * tree ,
struct page * page )
{
u64 start = page - > index < < PAGE_CACHE_SHIFT ;
u64 end = start + PAGE_CACHE_SIZE - 1 ;
if ( ! test_range_bit ( tree , start , end , EXTENT_WRITEBACK , 0 ) )
end_page_writeback ( page ) ;
return 0 ;
}
/* lots and lots of room for performance fixes in the end_bio funcs */
/*
* after a writepage IO is done , we need to :
* clear the uptodate bits on error
* clear the writeback bits in the extent tree for this IO
* end_page_writeback if the page has no more pending IO
*
* Scheduling is not allowed , so the extent state tree is expected
* to have one and only one object corresponding to this IO .
*/
static int end_bio_extent_writepage ( struct bio * bio ,
unsigned int bytes_done , int err )
{
const int uptodate = test_bit ( BIO_UPTODATE , & bio - > bi_flags ) ;
struct bio_vec * bvec = bio - > bi_io_vec + bio - > bi_vcnt - 1 ;
struct extent_map_tree * tree = bio - > bi_private ;
u64 start ;
u64 end ;
int whole_page ;
if ( bio - > bi_size )
return 1 ;
do {
struct page * page = bvec - > bv_page ;
start = ( page - > index < < PAGE_CACHE_SHIFT ) + bvec - > bv_offset ;
end = start + bvec - > bv_len - 1 ;
if ( bvec - > bv_offset = = 0 & & bvec - > bv_len = = PAGE_CACHE_SIZE )
whole_page = 1 ;
else
whole_page = 0 ;
if ( - - bvec > = bio - > bi_io_vec )
prefetchw ( & bvec - > bv_page - > flags ) ;
if ( ! uptodate ) {
clear_extent_uptodate ( tree , start , end , GFP_ATOMIC ) ;
ClearPageUptodate ( page ) ;
SetPageError ( page ) ;
}
clear_extent_writeback ( tree , start , end , GFP_ATOMIC ) ;
if ( whole_page )
end_page_writeback ( page ) ;
else
check_page_writeback ( tree , page ) ;
} while ( bvec > = bio - > bi_io_vec ) ;
bio_put ( bio ) ;
return 0 ;
}
/*
* after a readpage IO is done , we need to :
* clear the uptodate bits on error
* set the uptodate bits if things worked
* set the page up to date if all extents in the tree are uptodate
* clear the lock bit in the extent tree
* unlock the page if there are no other extents locked for it
*
* Scheduling is not allowed , so the extent state tree is expected
* to have one and only one object corresponding to this IO .
*/
static int end_bio_extent_readpage ( struct bio * bio ,
unsigned int bytes_done , int err )
{
2007-08-30 16:50:51 +04:00
int uptodate = test_bit ( BIO_UPTODATE , & bio - > bi_flags ) ;
2007-08-28 00:49:44 +04:00
struct bio_vec * bvec = bio - > bi_io_vec + bio - > bi_vcnt - 1 ;
struct extent_map_tree * tree = bio - > bi_private ;
u64 start ;
u64 end ;
int whole_page ;
2007-08-30 16:50:51 +04:00
int ret ;
2007-08-28 00:49:44 +04:00
if ( bio - > bi_size )
return 1 ;
do {
struct page * page = bvec - > bv_page ;
start = ( page - > index < < PAGE_CACHE_SHIFT ) + bvec - > bv_offset ;
end = start + bvec - > bv_len - 1 ;
if ( bvec - > bv_offset = = 0 & & bvec - > bv_len = = PAGE_CACHE_SIZE )
whole_page = 1 ;
else
whole_page = 0 ;
if ( - - bvec > = bio - > bi_io_vec )
prefetchw ( & bvec - > bv_page - > flags ) ;
2007-08-30 16:50:51 +04:00
if ( uptodate & & tree - > ops & & tree - > ops - > readpage_end_io_hook ) {
ret = tree - > ops - > readpage_end_io_hook ( page , start , end ) ;
if ( ret )
uptodate = 0 ;
}
2007-08-28 00:49:44 +04:00
if ( uptodate ) {
set_extent_uptodate ( tree , start , end , GFP_ATOMIC ) ;
if ( whole_page )
SetPageUptodate ( page ) ;
else
check_page_uptodate ( tree , page ) ;
} else {
ClearPageUptodate ( page ) ;
SetPageError ( page ) ;
}
unlock_extent ( tree , start , end , GFP_ATOMIC ) ;
if ( whole_page )
unlock_page ( page ) ;
else
check_page_locked ( tree , page ) ;
} while ( bvec > = bio - > bi_io_vec ) ;
bio_put ( bio ) ;
return 0 ;
}
/*
* IO done from prepare_write is pretty simple , we just unlock
* the structs in the extent tree when done , and set the uptodate bits
* as appropriate .
*/
static int end_bio_extent_preparewrite ( struct bio * bio ,
unsigned int bytes_done , int err )
{
const int uptodate = test_bit ( BIO_UPTODATE , & bio - > bi_flags ) ;
struct bio_vec * bvec = bio - > bi_io_vec + bio - > bi_vcnt - 1 ;
struct extent_map_tree * tree = bio - > bi_private ;
u64 start ;
u64 end ;
if ( bio - > bi_size )
return 1 ;
do {
struct page * page = bvec - > bv_page ;
start = ( page - > index < < PAGE_CACHE_SHIFT ) + bvec - > bv_offset ;
end = start + bvec - > bv_len - 1 ;
if ( - - bvec > = bio - > bi_io_vec )
prefetchw ( & bvec - > bv_page - > flags ) ;
if ( uptodate ) {
set_extent_uptodate ( tree , start , end , GFP_ATOMIC ) ;
} else {
ClearPageUptodate ( page ) ;
SetPageError ( page ) ;
}
unlock_extent ( tree , start , end , GFP_ATOMIC ) ;
} while ( bvec > = bio - > bi_io_vec ) ;
bio_put ( bio ) ;
return 0 ;
}
static int submit_extent_page ( int rw , struct extent_map_tree * tree ,
struct page * page , sector_t sector ,
size_t size , unsigned long offset ,
struct block_device * bdev ,
bio_end_io_t end_io_func )
{
struct bio * bio ;
int ret = 0 ;
bio = bio_alloc ( GFP_NOIO , 1 ) ;
bio - > bi_sector = sector ;
bio - > bi_bdev = bdev ;
bio - > bi_io_vec [ 0 ] . bv_page = page ;
bio - > bi_io_vec [ 0 ] . bv_len = size ;
bio - > bi_io_vec [ 0 ] . bv_offset = offset ;
bio - > bi_vcnt = 1 ;
bio - > bi_idx = 0 ;
bio - > bi_size = size ;
bio - > bi_end_io = end_io_func ;
bio - > bi_private = tree ;
bio_get ( bio ) ;
submit_bio ( rw , bio ) ;
if ( bio_flagged ( bio , BIO_EOPNOTSUPP ) )
ret = - EOPNOTSUPP ;
bio_put ( bio ) ;
return ret ;
}
/*
* basic readpage implementation . Locked extent state structs are inserted
* into the tree that are removed when the IO is done ( by the end_io
* handlers )
*/
int extent_read_full_page ( struct extent_map_tree * tree , struct page * page ,
get_extent_t * get_extent )
{
struct inode * inode = page - > mapping - > host ;
u64 start = page - > index < < PAGE_CACHE_SHIFT ;
u64 page_end = start + PAGE_CACHE_SIZE - 1 ;
u64 end ;
u64 cur = start ;
u64 extent_offset ;
u64 last_byte = i_size_read ( inode ) ;
u64 block_start ;
u64 cur_end ;
sector_t sector ;
struct extent_map * em ;
struct block_device * bdev ;
int ret ;
int nr = 0 ;
size_t page_offset = 0 ;
size_t iosize ;
size_t blocksize = inode - > i_sb - > s_blocksize ;
if ( ! PagePrivate ( page ) ) {
SetPagePrivate ( page ) ;
set_page_private ( page , 1 ) ;
2007-08-28 00:49:44 +04:00
WARN_ON ( ! page - > mapping - > a_ops - > invalidatepage ) ;
2007-08-28 00:49:44 +04:00
page_cache_get ( page ) ;
}
end = page_end ;
lock_extent ( tree , start , end , GFP_NOFS ) ;
while ( cur < = end ) {
if ( cur > = last_byte ) {
iosize = PAGE_CACHE_SIZE - page_offset ;
zero_user_page ( page , page_offset , iosize , KM_USER0 ) ;
set_extent_uptodate ( tree , cur , cur + iosize - 1 ,
GFP_NOFS ) ;
unlock_extent ( tree , cur , cur + iosize - 1 , GFP_NOFS ) ;
break ;
}
em = get_extent ( inode , page , page_offset , cur , end , 0 ) ;
if ( IS_ERR ( em ) | | ! em ) {
SetPageError ( page ) ;
unlock_extent ( tree , cur , end , GFP_NOFS ) ;
break ;
}
extent_offset = cur - em - > start ;
BUG_ON ( em - > end < cur ) ;
BUG_ON ( end < cur ) ;
iosize = min ( em - > end - cur , end - cur ) + 1 ;
cur_end = min ( em - > end , end ) ;
iosize = ( iosize + blocksize - 1 ) & ~ ( ( u64 ) blocksize - 1 ) ;
sector = ( em - > block_start + extent_offset ) > > 9 ;
bdev = em - > bdev ;
block_start = em - > block_start ;
free_extent_map ( em ) ;
em = NULL ;
/* we've found a hole, just zero and go on */
if ( block_start = = 0 ) {
zero_user_page ( page , page_offset , iosize , KM_USER0 ) ;
set_extent_uptodate ( tree , cur , cur + iosize - 1 ,
GFP_NOFS ) ;
unlock_extent ( tree , cur , cur + iosize - 1 , GFP_NOFS ) ;
cur = cur + iosize ;
page_offset + = iosize ;
continue ;
}
/* the get_extent function already copied into the page */
if ( test_range_bit ( tree , cur , cur_end , EXTENT_UPTODATE , 1 ) ) {
unlock_extent ( tree , cur , cur + iosize - 1 , GFP_NOFS ) ;
cur = cur + iosize ;
page_offset + = iosize ;
continue ;
}
2007-08-30 16:50:51 +04:00
ret = 0 ;
if ( tree - > ops & & tree - > ops - > readpage_io_hook ) {
ret = tree - > ops - > readpage_io_hook ( page , cur ,
cur + iosize - 1 ) ;
}
if ( ! ret ) {
ret = submit_extent_page ( READ , tree , page ,
sector , iosize , page_offset ,
bdev , end_bio_extent_readpage ) ;
}
2007-08-28 00:49:44 +04:00
if ( ret )
SetPageError ( page ) ;
cur = cur + iosize ;
page_offset + = iosize ;
nr + + ;
}
if ( ! nr ) {
if ( ! PageError ( page ) )
SetPageUptodate ( page ) ;
unlock_page ( page ) ;
}
return 0 ;
}
EXPORT_SYMBOL ( extent_read_full_page ) ;
/*
* the writepage semantics are similar to regular writepage . extent
* records are inserted to lock ranges in the tree , and as dirty areas
* are found , they are marked writeback . Then the lock bits are removed
* and the end_io handler clears the writeback ranges
*/
int extent_write_full_page ( struct extent_map_tree * tree , struct page * page ,
get_extent_t * get_extent ,
struct writeback_control * wbc )
{
struct inode * inode = page - > mapping - > host ;
u64 start = page - > index < < PAGE_CACHE_SHIFT ;
u64 page_end = start + PAGE_CACHE_SIZE - 1 ;
u64 end ;
u64 cur = start ;
u64 extent_offset ;
u64 last_byte = i_size_read ( inode ) ;
u64 block_start ;
sector_t sector ;
struct extent_map * em ;
struct block_device * bdev ;
int ret ;
int nr = 0 ;
size_t page_offset = 0 ;
size_t iosize ;
size_t blocksize ;
loff_t i_size = i_size_read ( inode ) ;
unsigned long end_index = i_size > > PAGE_CACHE_SHIFT ;
2007-08-28 00:49:44 +04:00
u64 nr_delalloc ;
u64 delalloc_end ;
2007-08-28 00:49:44 +04:00
2007-08-28 00:49:44 +04:00
WARN_ON ( ! PageLocked ( page ) ) ;
2007-08-28 00:49:44 +04:00
if ( page - > index > end_index ) {
clear_extent_dirty ( tree , start , page_end , GFP_NOFS ) ;
unlock_page ( page ) ;
return 0 ;
}
if ( page - > index = = end_index ) {
size_t offset = i_size & ( PAGE_CACHE_SIZE - 1 ) ;
zero_user_page ( page , offset ,
PAGE_CACHE_SIZE - offset , KM_USER0 ) ;
}
if ( ! PagePrivate ( page ) ) {
SetPagePrivate ( page ) ;
set_page_private ( page , 1 ) ;
2007-08-28 00:49:44 +04:00
WARN_ON ( ! page - > mapping - > a_ops - > invalidatepage ) ;
2007-08-28 00:49:44 +04:00
page_cache_get ( page ) ;
}
lock_extent ( tree , start , page_end , GFP_NOFS ) ;
2007-08-28 00:49:44 +04:00
nr_delalloc = find_lock_delalloc_range ( tree , start , page_end + 1 ,
& delalloc_end ,
128 * 1024 * 1024 ) ;
if ( nr_delalloc ) {
2007-08-30 16:50:51 +04:00
tree - > ops - > fill_delalloc ( inode , start , delalloc_end ) ;
2007-08-28 00:49:44 +04:00
if ( delalloc_end > = page_end + 1 ) {
clear_extent_bit ( tree , page_end + 1 , delalloc_end ,
EXTENT_LOCKED | EXTENT_DELALLOC ,
1 , 0 , GFP_NOFS ) ;
}
clear_extent_bit ( tree , start , page_end , EXTENT_DELALLOC ,
0 , 0 , GFP_NOFS ) ;
if ( test_range_bit ( tree , start , page_end , EXTENT_DELALLOC , 0 ) ) {
printk ( " found delalloc bits after clear extent_bit \n " ) ;
}
} else if ( test_range_bit ( tree , start , page_end , EXTENT_DELALLOC , 0 ) ) {
printk ( " found delalloc bits after find_delalloc_range returns 0 \n " ) ;
}
end = page_end ;
if ( test_range_bit ( tree , start , page_end , EXTENT_DELALLOC , 0 ) ) {
printk ( " found delalloc bits after lock_extent \n " ) ;
}
2007-08-28 00:49:44 +04:00
if ( last_byte < = start ) {
clear_extent_dirty ( tree , start , page_end , GFP_NOFS ) ;
goto done ;
}
set_extent_uptodate ( tree , start , page_end , GFP_NOFS ) ;
blocksize = inode - > i_sb - > s_blocksize ;
while ( cur < = end ) {
if ( cur > = last_byte ) {
clear_extent_dirty ( tree , cur , page_end , GFP_NOFS ) ;
break ;
}
2007-08-28 00:49:44 +04:00
em = get_extent ( inode , page , page_offset , cur , end , 0 ) ;
2007-08-28 00:49:44 +04:00
if ( IS_ERR ( em ) | | ! em ) {
SetPageError ( page ) ;
break ;
}
extent_offset = cur - em - > start ;
BUG_ON ( em - > end < cur ) ;
BUG_ON ( end < cur ) ;
iosize = min ( em - > end - cur , end - cur ) + 1 ;
iosize = ( iosize + blocksize - 1 ) & ~ ( ( u64 ) blocksize - 1 ) ;
sector = ( em - > block_start + extent_offset ) > > 9 ;
bdev = em - > bdev ;
block_start = em - > block_start ;
free_extent_map ( em ) ;
em = NULL ;
if ( block_start = = 0 | | block_start = = EXTENT_MAP_INLINE ) {
clear_extent_dirty ( tree , cur ,
cur + iosize - 1 , GFP_NOFS ) ;
cur = cur + iosize ;
page_offset + = iosize ;
continue ;
}
/* leave this out until we have a page_mkwrite call */
if ( 0 & & ! test_range_bit ( tree , cur , cur + iosize - 1 ,
EXTENT_DIRTY , 0 ) ) {
cur = cur + iosize ;
page_offset + = iosize ;
continue ;
}
clear_extent_dirty ( tree , cur , cur + iosize - 1 , GFP_NOFS ) ;
2007-08-30 16:50:51 +04:00
ret = tree - > ops - > writepage_io_hook ( page , cur , cur + iosize - 1 ) ;
2007-08-28 00:49:44 +04:00
if ( ret )
SetPageError ( page ) ;
2007-08-30 16:50:51 +04:00
else {
set_range_writeback ( tree , cur , cur + iosize - 1 ) ;
ret = submit_extent_page ( WRITE , tree , page , sector ,
iosize , page_offset , bdev ,
end_bio_extent_writepage ) ;
if ( ret )
SetPageError ( page ) ;
}
2007-08-28 00:49:44 +04:00
cur = cur + iosize ;
page_offset + = iosize ;
nr + + ;
}
done :
WARN_ON ( test_range_bit ( tree , start , page_end , EXTENT_DIRTY , 0 ) ) ;
unlock_extent ( tree , start , page_end , GFP_NOFS ) ;
unlock_page ( page ) ;
return 0 ;
}
EXPORT_SYMBOL ( extent_write_full_page ) ;
/*
* basic invalidatepage code , this waits on any locked or writeback
* ranges corresponding to the page , and then deletes any extent state
* records from the tree
*/
int extent_invalidatepage ( struct extent_map_tree * tree ,
struct page * page , unsigned long offset )
{
u64 start = ( page - > index < < PAGE_CACHE_SHIFT ) ;
u64 end = start + PAGE_CACHE_SIZE - 1 ;
size_t blocksize = page - > mapping - > host - > i_sb - > s_blocksize ;
start + = ( offset + blocksize - 1 ) & ~ ( blocksize - 1 ) ;
if ( start > end )
return 0 ;
lock_extent ( tree , start , end , GFP_NOFS ) ;
wait_on_extent_writeback ( tree , start , end ) ;
clear_extent_bit ( tree , start , end , EXTENT_LOCKED | EXTENT_DIRTY ,
1 , 1 , GFP_NOFS ) ;
return 0 ;
}
EXPORT_SYMBOL ( extent_invalidatepage ) ;
/*
* simple commit_write call , set_range_dirty is used to mark both
* the pages and the extent records as dirty
*/
int extent_commit_write ( struct extent_map_tree * tree ,
struct inode * inode , struct page * page ,
unsigned from , unsigned to )
{
loff_t pos = ( ( loff_t ) page - > index < < PAGE_CACHE_SHIFT ) + to ;
if ( ! PagePrivate ( page ) ) {
SetPagePrivate ( page ) ;
set_page_private ( page , 1 ) ;
2007-08-28 00:49:44 +04:00
WARN_ON ( ! page - > mapping - > a_ops - > invalidatepage ) ;
2007-08-28 00:49:44 +04:00
page_cache_get ( page ) ;
}
set_page_dirty ( page ) ;
if ( pos > inode - > i_size ) {
i_size_write ( inode , pos ) ;
mark_inode_dirty ( inode ) ;
}
return 0 ;
}
EXPORT_SYMBOL ( extent_commit_write ) ;
int extent_prepare_write ( struct extent_map_tree * tree ,
struct inode * inode , struct page * page ,
unsigned from , unsigned to , get_extent_t * get_extent )
{
u64 page_start = page - > index < < PAGE_CACHE_SHIFT ;
u64 page_end = page_start + PAGE_CACHE_SIZE - 1 ;
u64 block_start ;
u64 orig_block_start ;
u64 block_end ;
u64 cur_end ;
struct extent_map * em ;
unsigned blocksize = 1 < < inode - > i_blkbits ;
size_t page_offset = 0 ;
size_t block_off_start ;
size_t block_off_end ;
int err = 0 ;
int iocount = 0 ;
int ret = 0 ;
int isnew ;
if ( ! PagePrivate ( page ) ) {
SetPagePrivate ( page ) ;
set_page_private ( page , 1 ) ;
2007-08-28 00:49:44 +04:00
WARN_ON ( ! page - > mapping - > a_ops - > invalidatepage ) ;
2007-08-28 00:49:44 +04:00
page_cache_get ( page ) ;
}
block_start = ( page_start + from ) & ~ ( ( u64 ) blocksize - 1 ) ;
block_end = ( page_start + to - 1 ) | ( blocksize - 1 ) ;
orig_block_start = block_start ;
lock_extent ( tree , page_start , page_end , GFP_NOFS ) ;
while ( block_start < = block_end ) {
em = get_extent ( inode , page , page_offset , block_start ,
block_end , 1 ) ;
if ( IS_ERR ( em ) | | ! em ) {
goto err ;
}
cur_end = min ( block_end , em - > end ) ;
block_off_start = block_start & ( PAGE_CACHE_SIZE - 1 ) ;
block_off_end = block_off_start + blocksize ;
isnew = clear_extent_new ( tree , block_start , cur_end , GFP_NOFS ) ;
if ( ! PageUptodate ( page ) & & isnew & &
( block_off_end > to | | block_off_start < from ) ) {
void * kaddr ;
kaddr = kmap_atomic ( page , KM_USER0 ) ;
if ( block_off_end > to )
memset ( kaddr + to , 0 , block_off_end - to ) ;
if ( block_off_start < from )
memset ( kaddr + block_off_start , 0 ,
from - block_off_start ) ;
flush_dcache_page ( page ) ;
kunmap_atomic ( kaddr , KM_USER0 ) ;
}
if ( ! isnew & & ! PageUptodate ( page ) & &
( block_off_end > to | | block_off_start < from ) & &
! test_range_bit ( tree , block_start , cur_end ,
EXTENT_UPTODATE , 1 ) ) {
u64 sector ;
u64 extent_offset = block_start - em - > start ;
size_t iosize ;
sector = ( em - > block_start + extent_offset ) > > 9 ;
iosize = ( cur_end - block_start + blocksize - 1 ) &
~ ( ( u64 ) blocksize - 1 ) ;
/*
* we ' ve already got the extent locked , but we
* need to split the state such that our end_bio
* handler can clear the lock .
*/
set_extent_bit ( tree , block_start ,
block_start + iosize - 1 ,
EXTENT_LOCKED , 0 , NULL , GFP_NOFS ) ;
ret = submit_extent_page ( READ , tree , page ,
sector , iosize , page_offset , em - > bdev ,
end_bio_extent_preparewrite ) ;
iocount + + ;
block_start = block_start + iosize ;
} else {
set_extent_uptodate ( tree , block_start , cur_end ,
GFP_NOFS ) ;
unlock_extent ( tree , block_start , cur_end , GFP_NOFS ) ;
block_start = cur_end + 1 ;
}
page_offset = block_start & ( PAGE_CACHE_SIZE - 1 ) ;
free_extent_map ( em ) ;
}
if ( iocount ) {
wait_extent_bit ( tree , orig_block_start ,
block_end , EXTENT_LOCKED ) ;
}
check_page_uptodate ( tree , page ) ;
err :
/* FIXME, zero out newly allocated blocks on error */
return err ;
}
EXPORT_SYMBOL ( extent_prepare_write ) ;
/*
* a helper for releasepage . As long as there are no locked extents
* in the range corresponding to the page , both state records and extent
* map records are removed
*/
int try_release_extent_mapping ( struct extent_map_tree * tree , struct page * page )
{
struct extent_map * em ;
u64 start = page - > index < < PAGE_CACHE_SHIFT ;
u64 end = start + PAGE_CACHE_SIZE - 1 ;
u64 orig_start = start ;
2007-08-28 00:49:44 +04:00
int ret = 1 ;
2007-08-28 00:49:44 +04:00
while ( start < = end ) {
em = lookup_extent_mapping ( tree , start , end ) ;
if ( ! em | | IS_ERR ( em ) )
break ;
2007-08-28 00:49:44 +04:00
if ( ! test_range_bit ( tree , em - > start , em - > end ,
EXTENT_LOCKED , 0 ) ) {
remove_extent_mapping ( tree , em ) ;
/* once for the rb tree */
2007-08-28 00:49:44 +04:00
free_extent_map ( em ) ;
}
start = em - > end + 1 ;
/* once for us */
free_extent_map ( em ) ;
}
2007-08-28 00:49:44 +04:00
if ( test_range_bit ( tree , orig_start , end , EXTENT_LOCKED , 0 ) )
ret = 0 ;
else
clear_extent_bit ( tree , orig_start , end , EXTENT_UPTODATE ,
1 , 1 , GFP_NOFS ) ;
return ret ;
2007-08-28 00:49:44 +04:00
}
EXPORT_SYMBOL ( try_release_extent_mapping ) ;