2008-07-14 20:08:37 +04:00
/*
* This file is part of UBIFS .
*
* Copyright ( C ) 2006 - 2008 Nokia Corporation .
*
* This program is free software ; you can redistribute it and / or modify it
* under the terms of the GNU General Public License version 2 as published by
* the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful , but WITHOUT
* ANY WARRANTY ; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE . See the GNU General Public License for
* more details .
*
* You should have received a copy of the GNU General Public License along with
* this program ; if not , write to the Free Software Foundation , Inc . , 51
* Franklin St , Fifth Floor , Boston , MA 02110 - 1301 USA
*
* Authors : Artem Bityutskiy ( Б и т ю ц к и й А р т ё м )
* Adrian Hunter
*/
/*
* This file implements UBIFS shrinker which evicts clean znodes from the TNC
* tree when Linux VM needs more RAM .
*
* We do not implement any LRU lists to find oldest znodes to free because it
* would add additional overhead to the file system fast paths . So the shrinker
* just walks the TNC tree when searching for znodes to free .
*
* If the root of a TNC sub - tree is clean and old enough , then the children are
* also clean and old enough . So the shrinker walks the TNC in level order and
* dumps entire sub - trees .
*
* The age of znodes is just the time - stamp when they were last looked at .
* The current shrinker first tries to evict old znodes , then young ones .
*
* Since the shrinker is global , it has to protect against races with FS
* un - mounts , which is done by the ' ubifs_infos_lock ' and ' c - > umount_mutex ' .
*/
# include "ubifs.h"
/* List of all UBIFS file-system instances */
LIST_HEAD ( ubifs_infos ) ;
/*
* We number each shrinker run and record the number on the ubifs_info structure
* so that we can easily work out which ubifs_info structures have already been
* done by the current run .
*/
static unsigned int shrinker_run_no ;
/* Protects 'ubifs_infos' list */
DEFINE_SPINLOCK ( ubifs_infos_lock ) ;
/* Global clean znode counter (for all mounted UBIFS instances) */
atomic_long_t ubifs_clean_zn_cnt ;
/**
* shrink_tnc - shrink TNC tree .
* @ c : UBIFS file - system description object
* @ nr : number of znodes to free
* @ age : the age of znodes to free
* @ contention : if any contention , this is set to % 1
*
* This function traverses TNC tree and frees clean znodes . It does not free
* clean znodes which younger then @ age . Returns number of freed znodes .
*/
static int shrink_tnc ( struct ubifs_info * c , int nr , int age , int * contention )
{
int total_freed = 0 ;
struct ubifs_znode * znode , * zprev ;
int time = get_seconds ( ) ;
ubifs_assert ( mutex_is_locked ( & c - > umount_mutex ) ) ;
ubifs_assert ( mutex_is_locked ( & c - > tnc_mutex ) ) ;
if ( ! c - > zroot . znode | | atomic_long_read ( & c - > clean_zn_cnt ) = = 0 )
return 0 ;
/*
* Traverse the TNC tree in levelorder manner , so that it is possible
* to destroy large sub - trees . Indeed , if a znode is old , then all its
* children are older or of the same age .
*
* Note , we are holding ' c - > tnc_mutex ' , so we do not have to lock the
* ' c - > space_lock ' when _reading_ ' c - > clean_zn_cnt ' , because it is
* changed only when the ' c - > tnc_mutex ' is held .
*/
zprev = NULL ;
znode = ubifs_tnc_levelorder_next ( c - > zroot . znode , NULL ) ;
while ( znode & & total_freed < nr & &
atomic_long_read ( & c - > clean_zn_cnt ) > 0 ) {
int freed ;
/*
* If the znode is clean , but it is in the ' c - > cnext ' list , this
* means that this znode has just been written to flash as a
* part of commit and was marked clean . They will be removed
* from the list at end commit . We cannot change the list ,
* because it is not protected by any mutex ( design decision to
* make commit really independent and parallel to main I / O ) . So
* we just skip these znodes .
*
* Note , the ' clean_zn_cnt ' counters are not updated until
* after the commit , so the UBIFS shrinker does not report
* the znodes which are in the ' c - > cnext ' list as freeable .
*
* Also note , if the root of a sub - tree is not in ' c - > cnext ' ,
* then the whole sub - tree is not in ' c - > cnext ' as well , so it
* is safe to dump whole sub - tree .
*/
if ( znode - > cnext ) {
/*
* Very soon these znodes will be removed from the list
* and become freeable .
*/
* contention = 1 ;
} else if ( ! ubifs_zn_dirty ( znode ) & &
abs ( time - znode - > time ) > = age ) {
if ( znode - > parent )
znode - > parent - > zbranch [ znode - > iip ] . znode = NULL ;
else
c - > zroot . znode = NULL ;
freed = ubifs_destroy_tnc_subtree ( znode ) ;
atomic_long_sub ( freed , & ubifs_clean_zn_cnt ) ;
atomic_long_sub ( freed , & c - > clean_zn_cnt ) ;
ubifs_assert ( atomic_long_read ( & c - > clean_zn_cnt ) > = 0 ) ;
total_freed + = freed ;
znode = zprev ;
}
if ( unlikely ( ! c - > zroot . znode ) )
break ;
zprev = znode ;
znode = ubifs_tnc_levelorder_next ( c - > zroot . znode , znode ) ;
cond_resched ( ) ;
}
return total_freed ;
}
/**
* shrink_tnc_trees - shrink UBIFS TNC trees .
* @ nr : number of znodes to free
* @ age : the age of znodes to free
* @ contention : if any contention , this is set to % 1
*
* This function walks the list of mounted UBIFS file - systems and frees clean
2008-10-16 21:02:37 +04:00
* znodes which are older than @ age , until at least @ nr znodes are freed .
2008-07-14 20:08:37 +04:00
* Returns the number of freed znodes .
*/
static int shrink_tnc_trees ( int nr , int age , int * contention )
{
struct ubifs_info * c ;
struct list_head * p ;
unsigned int run_no ;
int freed = 0 ;
spin_lock ( & ubifs_infos_lock ) ;
do {
run_no = + + shrinker_run_no ;
} while ( run_no = = 0 ) ;
/* Iterate over all mounted UBIFS file-systems and try to shrink them */
p = ubifs_infos . next ;
while ( p ! = & ubifs_infos ) {
c = list_entry ( p , struct ubifs_info , infos_list ) ;
/*
* We move the ones we do to the end of the list , so we stop
* when we see one we have already done .
*/
if ( c - > shrinker_run_no = = run_no )
break ;
if ( ! mutex_trylock ( & c - > umount_mutex ) ) {
/* Some un-mount is in progress, try next FS */
* contention = 1 ;
p = p - > next ;
continue ;
}
/*
* We ' re holding ' c - > umount_mutex ' , so the file - system won ' t go
* away .
*/
if ( ! mutex_trylock ( & c - > tnc_mutex ) ) {
mutex_unlock ( & c - > umount_mutex ) ;
* contention = 1 ;
p = p - > next ;
continue ;
}
spin_unlock ( & ubifs_infos_lock ) ;
/*
* OK , now we have TNC locked , the file - system cannot go away -
* it is safe to reap the cache .
*/
c - > shrinker_run_no = run_no ;
freed + = shrink_tnc ( c , nr , age , contention ) ;
mutex_unlock ( & c - > tnc_mutex ) ;
spin_lock ( & ubifs_infos_lock ) ;
/* Get the next list element before we move this one */
p = p - > next ;
/*
* Move this one to the end of the list to provide some
* fairness .
*/
2009-02-13 11:13:11 +03:00
list_move_tail ( & c - > infos_list , & ubifs_infos ) ;
2008-07-14 20:08:37 +04:00
mutex_unlock ( & c - > umount_mutex ) ;
if ( freed > = nr )
break ;
}
spin_unlock ( & ubifs_infos_lock ) ;
return freed ;
}
/**
* kick_a_thread - kick a background thread to start commit .
*
* This function kicks a background thread to start background commit . Returns
* % - 1 if a thread was kicked or there is another reason to assume the memory
* will soon be freed or become freeable . If there are no dirty znodes , returns
* % 0.
*/
static int kick_a_thread ( void )
{
int i ;
struct ubifs_info * c ;
/*
* Iterate over all mounted UBIFS file - systems and find out if there is
* already an ongoing commit operation there . If no , then iterate for
* the second time and initiate background commit .
*/
spin_lock ( & ubifs_infos_lock ) ;
for ( i = 0 ; i < 2 ; i + + ) {
list_for_each_entry ( c , & ubifs_infos , infos_list ) {
long dirty_zn_cnt ;
if ( ! mutex_trylock ( & c - > umount_mutex ) ) {
/*
* Some un - mount is in progress , it will
* certainly free memory , so just return .
*/
spin_unlock ( & ubifs_infos_lock ) ;
return - 1 ;
}
dirty_zn_cnt = atomic_long_read ( & c - > dirty_zn_cnt ) ;
if ( ! dirty_zn_cnt | | c - > cmt_state = = COMMIT_BROKEN | |
c - > ro_media ) {
mutex_unlock ( & c - > umount_mutex ) ;
continue ;
}
if ( c - > cmt_state ! = COMMIT_RESTING ) {
spin_unlock ( & ubifs_infos_lock ) ;
mutex_unlock ( & c - > umount_mutex ) ;
return - 1 ;
}
if ( i = = 1 ) {
2009-02-13 11:13:11 +03:00
list_move_tail ( & c - > infos_list , & ubifs_infos ) ;
2008-07-14 20:08:37 +04:00
spin_unlock ( & ubifs_infos_lock ) ;
ubifs_request_bg_commit ( c ) ;
mutex_unlock ( & c - > umount_mutex ) ;
return - 1 ;
}
mutex_unlock ( & c - > umount_mutex ) ;
}
}
spin_unlock ( & ubifs_infos_lock ) ;
return 0 ;
}
int ubifs_shrinker ( int nr , gfp_t gfp_mask )
{
int freed , contention = 0 ;
long clean_zn_cnt = atomic_long_read ( & ubifs_clean_zn_cnt ) ;
if ( nr = = 0 )
return clean_zn_cnt ;
if ( ! clean_zn_cnt ) {
/*
* No clean znodes , nothing to reap . All we can do in this case
* is to kick background threads to start commit , which will
* probably make clean znodes which , in turn , will be freeable .
* And we return - 1 which means will make VM call us again
* later .
*/
dbg_tnc ( " no clean znodes, kick a thread " ) ;
return kick_a_thread ( ) ;
}
freed = shrink_tnc_trees ( nr , OLD_ZNODE_AGE , & contention ) ;
if ( freed > = nr )
goto out ;
dbg_tnc ( " not enough old znodes, try to free young ones " ) ;
freed + = shrink_tnc_trees ( nr - freed , YOUNG_ZNODE_AGE , & contention ) ;
if ( freed > = nr )
goto out ;
dbg_tnc ( " not enough young znodes, free all " ) ;
freed + = shrink_tnc_trees ( nr - freed , 0 , & contention ) ;
if ( ! freed & & contention ) {
dbg_tnc ( " freed nothing, but contention " ) ;
return - 1 ;
}
out :
dbg_tnc ( " %d znodes were freed, requested %d " , freed , nr ) ;
return freed ;
}