2005-04-17 02:20:36 +04:00
/*
2006-09-28 04:52:15 +04:00
* Copyright ( c ) 2000 - 2006 Silicon Graphics , Inc .
2005-11-02 06:58:39 +03:00
* All Rights Reserved .
2005-04-17 02:20:36 +04:00
*
2005-11-02 06:58:39 +03:00
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License as
2005-04-17 02:20:36 +04:00
* published by the Free Software Foundation .
*
2005-11-02 06:58:39 +03:00
* This program is distributed in the hope that it would be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
2005-04-17 02:20:36 +04:00
*
2005-11-02 06:58:39 +03:00
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write the Free Software Foundation ,
* Inc . , 51 Franklin St , Fifth Floor , Boston , MA 02110 - 1301 USA
2005-04-17 02:20:36 +04:00
*/
2006-11-11 10:03:49 +03:00
# include "xfs.h"
2005-04-17 02:20:36 +04:00
# include <linux/stddef.h>
# include <linux/errno.h>
# include <linux/slab.h>
# include <linux/pagemap.h>
# include <linux/init.h>
# include <linux/vmalloc.h>
# include <linux/bio.h>
# include <linux/sysctl.h>
# include <linux/proc_fs.h>
# include <linux/workqueue.h>
# include <linux/percpu.h>
# include <linux/blkdev.h>
# include <linux/hash.h>
2005-09-05 02:34:18 +04:00
# include <linux/kthread.h>
2006-03-22 11:09:12 +03:00
# include <linux/migrate.h>
2006-10-20 10:28:16 +04:00
# include <linux/backing-dev.h>
2006-12-07 07:34:23 +03:00
# include <linux/freezer.h>
2005-04-17 02:20:36 +04:00
2007-02-10 10:34:56 +03:00
static kmem_zone_t * xfs_buf_zone ;
static kmem_shaker_t xfs_buf_shake ;
2006-01-11 07:37:58 +03:00
STATIC int xfsbufd ( void * ) ;
2005-10-21 11:20:48 +04:00
STATIC int xfsbufd_wakeup ( int , gfp_t ) ;
2006-01-11 07:39:08 +03:00
STATIC void xfs_buf_delwri_queue ( xfs_buf_t * , int ) ;
2005-06-21 09:14:01 +04:00
2007-02-10 10:34:56 +03:00
static struct workqueue_struct * xfslogd_workqueue ;
2005-09-02 10:58:49 +04:00
struct workqueue_struct * xfsdatad_workqueue ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
# ifdef XFS_BUF_TRACE
2005-04-17 02:20:36 +04:00
void
2006-01-11 07:39:08 +03:00
xfs_buf_trace (
xfs_buf_t * bp ,
2005-04-17 02:20:36 +04:00
char * id ,
void * data ,
void * ra )
{
2006-01-11 07:39:08 +03:00
ktrace_enter ( xfs_buf_trace_buf ,
bp , id ,
( void * ) ( unsigned long ) bp - > b_flags ,
( void * ) ( unsigned long ) bp - > b_hold . counter ,
( void * ) ( unsigned long ) bp - > b_sema . count . counter ,
2005-04-17 02:20:36 +04:00
( void * ) current ,
data , ra ,
2006-01-11 07:39:08 +03:00
( void * ) ( unsigned long ) ( ( bp - > b_file_offset > > 32 ) & 0xffffffff ) ,
( void * ) ( unsigned long ) ( bp - > b_file_offset & 0xffffffff ) ,
( void * ) ( unsigned long ) bp - > b_buffer_length ,
2005-04-17 02:20:36 +04:00
NULL , NULL , NULL , NULL , NULL ) ;
}
2006-01-11 07:39:08 +03:00
ktrace_t * xfs_buf_trace_buf ;
# define XFS_BUF_TRACE_SIZE 4096
# define XB_TRACE(bp, id, data) \
xfs_buf_trace ( bp , id , ( void * ) data , ( void * ) __builtin_return_address ( 0 ) )
2005-04-17 02:20:36 +04:00
# else
2006-01-11 07:39:08 +03:00
# define XB_TRACE(bp, id, data) do { } while (0)
2005-04-17 02:20:36 +04:00
# endif
2006-01-11 07:39:08 +03:00
# ifdef XFS_BUF_LOCK_TRACKING
# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
# define XB_GET_OWNER(bp) ((bp)->b_last_holder)
2005-04-17 02:20:36 +04:00
# else
2006-01-11 07:39:08 +03:00
# define XB_SET_OWNER(bp) do { } while (0)
# define XB_CLEAR_OWNER(bp) do { } while (0)
# define XB_GET_OWNER(bp) do { } while (0)
2005-04-17 02:20:36 +04:00
# endif
2006-01-11 07:39:08 +03:00
# define xb_to_gfp(flags) \
( ( ( ( flags ) & XBF_READ_AHEAD ) ? __GFP_NORETRY : \
( ( flags ) & XBF_DONT_BLOCK ) ? GFP_NOFS : GFP_KERNEL ) | __GFP_NOWARN )
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
# define xb_to_km(flags) \
( ( ( flags ) & XBF_DONT_BLOCK ) ? KM_NOFS : KM_SLEEP )
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
# define xfs_buf_allocate(flags) \
kmem_zone_alloc ( xfs_buf_zone , xb_to_km ( flags ) )
# define xfs_buf_deallocate(bp) \
kmem_zone_free ( xfs_buf_zone , ( bp ) ) ;
2005-04-17 02:20:36 +04:00
/*
2006-01-11 07:39:08 +03:00
* Page Region interfaces .
2005-04-17 02:20:36 +04:00
*
2006-01-11 07:39:08 +03:00
* For pages in filesystems where the blocksize is smaller than the
* pagesize , we use the page - > private field ( long ) to hold a bitmap
* of uptodate regions within the page .
2005-04-17 02:20:36 +04:00
*
2006-01-11 07:39:08 +03:00
* Each such region is " bytes per page / bits per long " bytes long .
2005-04-17 02:20:36 +04:00
*
2006-01-11 07:39:08 +03:00
* NBPPR = = number - of - bytes - per - page - region
* BTOPR = = bytes - to - page - region ( rounded up )
* BTOPRT = = bytes - to - page - region - truncated ( rounded down )
2005-04-17 02:20:36 +04:00
*/
# if (BITS_PER_LONG == 32)
# define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
# elif (BITS_PER_LONG == 64)
# define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */
# else
# error BITS_PER_LONG must be 32 or 64
# endif
# define NBPPR (PAGE_CACHE_SIZE / BITS_PER_LONG)
# define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
# define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT))
STATIC unsigned long
page_region_mask (
size_t offset ,
size_t length )
{
unsigned long mask ;
int first , final ;
first = BTOPR ( offset ) ;
final = BTOPRT ( offset + length - 1 ) ;
first = min ( first , final ) ;
mask = ~ 0UL ;
mask < < = BITS_PER_LONG - ( final - first ) ;
mask > > = BITS_PER_LONG - ( final ) ;
ASSERT ( offset + length < = PAGE_CACHE_SIZE ) ;
ASSERT ( ( final - first ) < BITS_PER_LONG & & ( final - first ) > = 0 ) ;
return mask ;
}
2007-02-10 10:34:56 +03:00
STATIC_INLINE void
2005-04-17 02:20:36 +04:00
set_page_region (
struct page * page ,
size_t offset ,
size_t length )
{
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with
a many-threaded application which concurrently initializes different parts of
a large anonymous area.
This patch corrects that, by using a separate spinlock per page table page, to
guard the page table entries in that page, instead of using the mm's single
page_table_lock. (But even then, page_table_lock is still used to guard page
table allocation, and anon_vma allocation.)
In this implementation, the spinlock is tucked inside the struct page of the
page table page: with a BUILD_BUG_ON in case it overflows - which it would in
the case of 32-bit PA-RISC with spinlock debugging enabled.
Splitting the lock is not quite for free: another cacheline access. Ideally,
I suppose we would use split ptlock only for multi-threaded processes on
multi-cpu machines; but deciding that dynamically would have its own costs.
So for now enable it by config, at some number of cpus - since the Kconfig
language doesn't support inequalities, let preprocessor compare that with
NR_CPUS. But I don't think it's worth being user-configurable: for good
testing of both split and unsplit configs, split now at 4 cpus, and perhaps
change that to 8 later.
There is a benefit even for singly threaded processes: kswapd can be attacking
one part of the mm while another part is busy faulting.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 04:16:40 +03:00
set_page_private ( page ,
page_private ( page ) | page_region_mask ( offset , length ) ) ;
if ( page_private ( page ) = = ~ 0UL )
2005-04-17 02:20:36 +04:00
SetPageUptodate ( page ) ;
}
2007-02-10 10:34:56 +03:00
STATIC_INLINE int
2005-04-17 02:20:36 +04:00
test_page_region (
struct page * page ,
size_t offset ,
size_t length )
{
unsigned long mask = page_region_mask ( offset , length ) ;
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with
a many-threaded application which concurrently initializes different parts of
a large anonymous area.
This patch corrects that, by using a separate spinlock per page table page, to
guard the page table entries in that page, instead of using the mm's single
page_table_lock. (But even then, page_table_lock is still used to guard page
table allocation, and anon_vma allocation.)
In this implementation, the spinlock is tucked inside the struct page of the
page table page: with a BUILD_BUG_ON in case it overflows - which it would in
the case of 32-bit PA-RISC with spinlock debugging enabled.
Splitting the lock is not quite for free: another cacheline access. Ideally,
I suppose we would use split ptlock only for multi-threaded processes on
multi-cpu machines; but deciding that dynamically would have its own costs.
So for now enable it by config, at some number of cpus - since the Kconfig
language doesn't support inequalities, let preprocessor compare that with
NR_CPUS. But I don't think it's worth being user-configurable: for good
testing of both split and unsplit configs, split now at 4 cpus, and perhaps
change that to 8 later.
There is a benefit even for singly threaded processes: kswapd can be attacking
one part of the mm while another part is busy faulting.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 04:16:40 +03:00
return ( mask & & ( page_private ( page ) & mask ) = = mask ) ;
2005-04-17 02:20:36 +04:00
}
/*
2006-01-11 07:39:08 +03:00
* Mapping of multi - page buffers into contiguous virtual space
2005-04-17 02:20:36 +04:00
*/
typedef struct a_list {
void * vm_addr ;
struct a_list * next ;
} a_list_t ;
2007-02-10 10:34:56 +03:00
static a_list_t * as_free_head ;
static int as_list_len ;
static DEFINE_SPINLOCK ( as_lock ) ;
2005-04-17 02:20:36 +04:00
/*
2006-01-11 07:39:08 +03:00
* Try to batch vunmaps because they are costly .
2005-04-17 02:20:36 +04:00
*/
STATIC void
free_address (
void * addr )
{
a_list_t * aentry ;
2006-04-11 09:53:27 +04:00
aentry = kmalloc ( sizeof ( a_list_t ) , GFP_NOWAIT ) ;
2005-04-17 02:20:36 +04:00
if ( likely ( aentry ) ) {
spin_lock ( & as_lock ) ;
aentry - > next = as_free_head ;
aentry - > vm_addr = addr ;
as_free_head = aentry ;
as_list_len + + ;
spin_unlock ( & as_lock ) ;
} else {
vunmap ( addr ) ;
}
}
STATIC void
purge_addresses ( void )
{
a_list_t * aentry , * old ;
if ( as_free_head = = NULL )
return ;
spin_lock ( & as_lock ) ;
aentry = as_free_head ;
as_free_head = NULL ;
as_list_len = 0 ;
spin_unlock ( & as_lock ) ;
while ( ( old = aentry ) ! = NULL ) {
vunmap ( aentry - > vm_addr ) ;
aentry = aentry - > next ;
kfree ( old ) ;
}
}
/*
2006-01-11 07:39:08 +03:00
* Internal xfs_buf_t object manipulation
2005-04-17 02:20:36 +04:00
*/
STATIC void
2006-01-11 07:39:08 +03:00
_xfs_buf_initialize (
xfs_buf_t * bp ,
2005-04-17 02:20:36 +04:00
xfs_buftarg_t * target ,
2006-01-11 12:50:22 +03:00
xfs_off_t range_base ,
2005-04-17 02:20:36 +04:00
size_t range_length ,
2006-01-11 07:39:08 +03:00
xfs_buf_flags_t flags )
2005-04-17 02:20:36 +04:00
{
/*
2006-01-11 07:39:08 +03:00
* We don ' t want certain flags to appear in b_flags .
2005-04-17 02:20:36 +04:00
*/
2006-01-11 07:39:08 +03:00
flags & = ~ ( XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK | XBF_READ_AHEAD ) ;
memset ( bp , 0 , sizeof ( xfs_buf_t ) ) ;
atomic_set ( & bp - > b_hold , 1 ) ;
init_MUTEX_LOCKED ( & bp - > b_iodonesema ) ;
INIT_LIST_HEAD ( & bp - > b_list ) ;
INIT_LIST_HEAD ( & bp - > b_hash_list ) ;
init_MUTEX_LOCKED ( & bp - > b_sema ) ; /* held, no waiters */
XB_SET_OWNER ( bp ) ;
bp - > b_target = target ;
bp - > b_file_offset = range_base ;
2005-04-17 02:20:36 +04:00
/*
* Set buffer_length and count_desired to the same value initially .
* I / O routines should use count_desired , which will be the same in
* most cases but may be reset ( e . g . XFS recovery ) .
*/
2006-01-11 07:39:08 +03:00
bp - > b_buffer_length = bp - > b_count_desired = range_length ;
bp - > b_flags = flags ;
bp - > b_bn = XFS_BUF_DADDR_NULL ;
atomic_set ( & bp - > b_pin_count , 0 ) ;
init_waitqueue_head ( & bp - > b_waiters ) ;
XFS_STATS_INC ( xb_create ) ;
XB_TRACE ( bp , " initialize " , target ) ;
2005-04-17 02:20:36 +04:00
}
/*
2006-01-11 07:39:08 +03:00
* Allocate a page array capable of holding a specified number
* of pages , and point the page buf at it .
2005-04-17 02:20:36 +04:00
*/
STATIC int
2006-01-11 07:39:08 +03:00
_xfs_buf_get_pages (
xfs_buf_t * bp ,
2005-04-17 02:20:36 +04:00
int page_count ,
2006-01-11 07:39:08 +03:00
xfs_buf_flags_t flags )
2005-04-17 02:20:36 +04:00
{
/* Make sure that we have a page list */
2006-01-11 07:39:08 +03:00
if ( bp - > b_pages = = NULL ) {
bp - > b_offset = xfs_buf_poff ( bp - > b_file_offset ) ;
bp - > b_page_count = page_count ;
if ( page_count < = XB_PAGES ) {
bp - > b_pages = bp - > b_page_array ;
2005-04-17 02:20:36 +04:00
} else {
2006-01-11 07:39:08 +03:00
bp - > b_pages = kmem_alloc ( sizeof ( struct page * ) *
page_count , xb_to_km ( flags ) ) ;
if ( bp - > b_pages = = NULL )
2005-04-17 02:20:36 +04:00
return - ENOMEM ;
}
2006-01-11 07:39:08 +03:00
memset ( bp - > b_pages , 0 , sizeof ( struct page * ) * page_count ) ;
2005-04-17 02:20:36 +04:00
}
return 0 ;
}
/*
2006-01-11 07:39:08 +03:00
* Frees b_pages if it was allocated .
2005-04-17 02:20:36 +04:00
*/
STATIC void
2006-01-11 07:39:08 +03:00
_xfs_buf_free_pages (
2005-04-17 02:20:36 +04:00
xfs_buf_t * bp )
{
2006-01-11 07:39:08 +03:00
if ( bp - > b_pages ! = bp - > b_page_array ) {
kmem_free ( bp - > b_pages ,
bp - > b_page_count * sizeof ( struct page * ) ) ;
2005-04-17 02:20:36 +04:00
}
}
/*
* Releases the specified buffer .
*
* The modification state of any associated pages is left unchanged .
2006-01-11 07:39:08 +03:00
* The buffer most not be on any hash - use xfs_buf_rele instead for
2005-04-17 02:20:36 +04:00
* hashed and refcounted buffers
*/
void
2006-01-11 07:39:08 +03:00
xfs_buf_free (
2005-04-17 02:20:36 +04:00
xfs_buf_t * bp )
{
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " free " , 0 ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
ASSERT ( list_empty ( & bp - > b_hash_list ) ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
if ( bp - > b_flags & _XBF_PAGE_CACHE ) {
2005-04-17 02:20:36 +04:00
uint i ;
2006-01-11 07:39:08 +03:00
if ( ( bp - > b_flags & XBF_MAPPED ) & & ( bp - > b_page_count > 1 ) )
free_address ( bp - > b_addr - bp - > b_offset ) ;
2005-04-17 02:20:36 +04:00
2006-09-28 05:03:13 +04:00
for ( i = 0 ; i < bp - > b_page_count ; i + + ) {
struct page * page = bp - > b_pages [ i ] ;
ASSERT ( ! PagePrivate ( page ) ) ;
page_cache_release ( page ) ;
}
2006-01-11 07:39:08 +03:00
_xfs_buf_free_pages ( bp ) ;
} else if ( bp - > b_flags & _XBF_KMEM_ALLOC ) {
2005-04-17 02:20:36 +04:00
/*
2006-01-11 07:39:08 +03:00
* XXX ( hch ) : bp - > b_count_desired might be incorrect ( see
* xfs_buf_associate_memory for details ) , but fortunately
2005-04-17 02:20:36 +04:00
* the Linux version of kmem_free ignores the len argument . .
*/
2006-01-11 07:39:08 +03:00
kmem_free ( bp - > b_addr , bp - > b_count_desired ) ;
_xfs_buf_free_pages ( bp ) ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 07:39:08 +03:00
xfs_buf_deallocate ( bp ) ;
2005-04-17 02:20:36 +04:00
}
/*
* Finds all pages for buffer in question and builds it ' s page list .
*/
STATIC int
2006-01-11 07:39:08 +03:00
_xfs_buf_lookup_pages (
2005-04-17 02:20:36 +04:00
xfs_buf_t * bp ,
uint flags )
{
2006-01-11 07:39:08 +03:00
struct address_space * mapping = bp - > b_target - > bt_mapping ;
size_t blocksize = bp - > b_target - > bt_bsize ;
size_t size = bp - > b_count_desired ;
2005-04-17 02:20:36 +04:00
size_t nbytes , offset ;
2006-01-11 07:39:08 +03:00
gfp_t gfp_mask = xb_to_gfp ( flags ) ;
2005-04-17 02:20:36 +04:00
unsigned short page_count , i ;
pgoff_t first ;
2006-01-11 12:50:22 +03:00
xfs_off_t end ;
2005-04-17 02:20:36 +04:00
int error ;
2006-01-11 07:39:08 +03:00
end = bp - > b_file_offset + bp - > b_buffer_length ;
page_count = xfs_buf_btoc ( end ) - xfs_buf_btoct ( bp - > b_file_offset ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
error = _xfs_buf_get_pages ( bp , page_count , flags ) ;
2005-04-17 02:20:36 +04:00
if ( unlikely ( error ) )
return error ;
2006-01-11 07:39:08 +03:00
bp - > b_flags | = _XBF_PAGE_CACHE ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
offset = bp - > b_offset ;
first = bp - > b_file_offset > > PAGE_CACHE_SHIFT ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
for ( i = 0 ; i < bp - > b_page_count ; i + + ) {
2005-04-17 02:20:36 +04:00
struct page * page ;
uint retries = 0 ;
retry :
page = find_or_create_page ( mapping , first + i , gfp_mask ) ;
if ( unlikely ( page = = NULL ) ) {
2006-01-11 07:39:08 +03:00
if ( flags & XBF_READ_AHEAD ) {
bp - > b_page_count = i ;
for ( i = 0 ; i < bp - > b_page_count ; i + + )
unlock_page ( bp - > b_pages [ i ] ) ;
2005-04-17 02:20:36 +04:00
return - ENOMEM ;
}
/*
* This could deadlock .
*
* But until all the XFS lowlevel code is revamped to
* handle buffer allocation failures we can ' t do much .
*/
if ( ! ( + + retries % 100 ) )
printk ( KERN_ERR
" XFS: possible memory allocation "
" deadlock in %s (mode:0x%x) \n " ,
__FUNCTION__ , gfp_mask ) ;
2006-01-11 07:39:08 +03:00
XFS_STATS_INC ( xb_page_retries ) ;
2005-06-21 09:14:01 +04:00
xfsbufd_wakeup ( 0 , gfp_mask ) ;
2006-10-20 10:28:16 +04:00
congestion_wait ( WRITE , HZ / 50 ) ;
2005-04-17 02:20:36 +04:00
goto retry ;
}
2006-01-11 07:39:08 +03:00
XFS_STATS_INC ( xb_page_found ) ;
2005-04-17 02:20:36 +04:00
nbytes = min_t ( size_t , size , PAGE_CACHE_SIZE - offset ) ;
size - = nbytes ;
2006-09-28 05:03:13 +04:00
ASSERT ( ! PagePrivate ( page ) ) ;
2005-04-17 02:20:36 +04:00
if ( ! PageUptodate ( page ) ) {
page_count - - ;
if ( blocksize > = PAGE_CACHE_SIZE ) {
2006-01-11 07:39:08 +03:00
if ( flags & XBF_READ )
bp - > b_locked = 1 ;
2005-04-17 02:20:36 +04:00
} else if ( ! PagePrivate ( page ) ) {
if ( test_page_region ( page , offset , nbytes ) )
page_count + + ;
}
}
2006-01-11 07:39:08 +03:00
bp - > b_pages [ i ] = page ;
2005-04-17 02:20:36 +04:00
offset = 0 ;
}
2006-01-11 07:39:08 +03:00
if ( ! bp - > b_locked ) {
for ( i = 0 ; i < bp - > b_page_count ; i + + )
unlock_page ( bp - > b_pages [ i ] ) ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 07:39:08 +03:00
if ( page_count = = bp - > b_page_count )
bp - > b_flags | = XBF_DONE ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " lookup_pages " , ( long ) page_count ) ;
2005-04-17 02:20:36 +04:00
return error ;
}
/*
* Map buffer into kernel address - space if nessecary .
*/
STATIC int
2006-01-11 07:39:08 +03:00
_xfs_buf_map_pages (
2005-04-17 02:20:36 +04:00
xfs_buf_t * bp ,
uint flags )
{
/* A single page buffer is always mappable */
2006-01-11 07:39:08 +03:00
if ( bp - > b_page_count = = 1 ) {
bp - > b_addr = page_address ( bp - > b_pages [ 0 ] ) + bp - > b_offset ;
bp - > b_flags | = XBF_MAPPED ;
} else if ( flags & XBF_MAPPED ) {
2005-04-17 02:20:36 +04:00
if ( as_list_len > 64 )
purge_addresses ( ) ;
2006-01-11 07:39:08 +03:00
bp - > b_addr = vmap ( bp - > b_pages , bp - > b_page_count ,
VM_MAP , PAGE_KERNEL ) ;
if ( unlikely ( bp - > b_addr = = NULL ) )
2005-04-17 02:20:36 +04:00
return - ENOMEM ;
2006-01-11 07:39:08 +03:00
bp - > b_addr + = bp - > b_offset ;
bp - > b_flags | = XBF_MAPPED ;
2005-04-17 02:20:36 +04:00
}
return 0 ;
}
/*
* Finding and Reading Buffers
*/
/*
2006-01-11 07:39:08 +03:00
* Look up , and creates if absent , a lockable buffer for
2005-04-17 02:20:36 +04:00
* a given range of an inode . The buffer is returned
* locked . If other overlapping buffers exist , they are
* released before the new buffer is created and locked ,
* which may imply that this call will block until those buffers
* are unlocked . No I / O is implied by this call .
*/
xfs_buf_t *
2006-01-11 07:39:08 +03:00
_xfs_buf_find (
2005-04-17 02:20:36 +04:00
xfs_buftarg_t * btp , /* block device target */
2006-01-11 12:50:22 +03:00
xfs_off_t ioff , /* starting offset of range */
2005-04-17 02:20:36 +04:00
size_t isize , /* length of range */
2006-01-11 07:39:08 +03:00
xfs_buf_flags_t flags ,
xfs_buf_t * new_bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 12:50:22 +03:00
xfs_off_t range_base ;
2005-04-17 02:20:36 +04:00
size_t range_length ;
xfs_bufhash_t * hash ;
2006-01-11 07:39:08 +03:00
xfs_buf_t * bp , * n ;
2005-04-17 02:20:36 +04:00
range_base = ( ioff < < BBSHIFT ) ;
range_length = ( isize < < BBSHIFT ) ;
/* Check for IOs smaller than the sector size / not sector aligned */
2006-01-11 07:39:08 +03:00
ASSERT ( ! ( range_length < ( 1 < < btp - > bt_sshift ) ) ) ;
2006-01-11 12:50:22 +03:00
ASSERT ( ! ( range_base & ( xfs_off_t ) btp - > bt_smask ) ) ;
2005-04-17 02:20:36 +04:00
hash = & btp - > bt_hash [ hash_long ( ( unsigned long ) ioff , btp - > bt_hashshift ) ] ;
spin_lock ( & hash - > bh_lock ) ;
2006-01-11 07:39:08 +03:00
list_for_each_entry_safe ( bp , n , & hash - > bh_list , b_hash_list ) {
ASSERT ( btp = = bp - > b_target ) ;
if ( bp - > b_file_offset = = range_base & &
bp - > b_buffer_length = = range_length ) {
2005-04-17 02:20:36 +04:00
/*
2006-01-11 07:39:08 +03:00
* If we look at something , bring it to the
2005-04-17 02:20:36 +04:00
* front of the list for next time .
*/
2006-01-11 07:39:08 +03:00
atomic_inc ( & bp - > b_hold ) ;
list_move ( & bp - > b_hash_list , & hash - > bh_list ) ;
2005-04-17 02:20:36 +04:00
goto found ;
}
}
/* No match found */
2006-01-11 07:39:08 +03:00
if ( new_bp ) {
_xfs_buf_initialize ( new_bp , btp , range_base ,
2005-04-17 02:20:36 +04:00
range_length , flags ) ;
2006-01-11 07:39:08 +03:00
new_bp - > b_hash = hash ;
list_add ( & new_bp - > b_hash_list , & hash - > bh_list ) ;
2005-04-17 02:20:36 +04:00
} else {
2006-01-11 07:39:08 +03:00
XFS_STATS_INC ( xb_miss_locked ) ;
2005-04-17 02:20:36 +04:00
}
spin_unlock ( & hash - > bh_lock ) ;
2006-01-11 07:39:08 +03:00
return new_bp ;
2005-04-17 02:20:36 +04:00
found :
spin_unlock ( & hash - > bh_lock ) ;
/* Attempt to get the semaphore without sleeping,
* if this does not work then we need to drop the
* spinlock and do a hard attempt on the semaphore .
*/
2006-01-11 07:39:08 +03:00
if ( down_trylock ( & bp - > b_sema ) ) {
if ( ! ( flags & XBF_TRYLOCK ) ) {
2005-04-17 02:20:36 +04:00
/* wait for buffer ownership */
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " get_lock " , 0 ) ;
xfs_buf_lock ( bp ) ;
XFS_STATS_INC ( xb_get_locked_waited ) ;
2005-04-17 02:20:36 +04:00
} else {
/* We asked for a trylock and failed, no need
* to look at file offset and length here , we
2006-01-11 07:39:08 +03:00
* know that this buffer at least overlaps our
* buffer and is locked , therefore our buffer
* either does not exist , or is this buffer .
2005-04-17 02:20:36 +04:00
*/
2006-01-11 07:39:08 +03:00
xfs_buf_rele ( bp ) ;
XFS_STATS_INC ( xb_busy_locked ) ;
return NULL ;
2005-04-17 02:20:36 +04:00
}
} else {
/* trylock worked */
2006-01-11 07:39:08 +03:00
XB_SET_OWNER ( bp ) ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 07:39:08 +03:00
if ( bp - > b_flags & XBF_STALE ) {
ASSERT ( ( bp - > b_flags & _XBF_DELWRI_Q ) = = 0 ) ;
bp - > b_flags & = XBF_MAPPED ;
2005-09-05 02:33:35 +04:00
}
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " got_lock " , 0 ) ;
XFS_STATS_INC ( xb_get_locked ) ;
return bp ;
2005-04-17 02:20:36 +04:00
}
/*
2006-01-11 07:39:08 +03:00
* Assembles a buffer covering the specified range .
2005-04-17 02:20:36 +04:00
* Storage in memory for all portions of the buffer will be allocated ,
* although backing storage may not be .
*/
xfs_buf_t *
2006-01-11 07:39:08 +03:00
xfs_buf_get_flags (
2005-04-17 02:20:36 +04:00
xfs_buftarg_t * target , /* target for buffer */
2006-01-11 12:50:22 +03:00
xfs_off_t ioff , /* starting offset of range */
2005-04-17 02:20:36 +04:00
size_t isize , /* length of range */
2006-01-11 07:39:08 +03:00
xfs_buf_flags_t flags )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
xfs_buf_t * bp , * new_bp ;
2005-04-17 02:20:36 +04:00
int error = 0 , i ;
2006-01-11 07:39:08 +03:00
new_bp = xfs_buf_allocate ( flags ) ;
if ( unlikely ( ! new_bp ) )
2005-04-17 02:20:36 +04:00
return NULL ;
2006-01-11 07:39:08 +03:00
bp = _xfs_buf_find ( target , ioff , isize , flags , new_bp ) ;
if ( bp = = new_bp ) {
error = _xfs_buf_lookup_pages ( bp , flags ) ;
2005-04-17 02:20:36 +04:00
if ( error )
goto no_buffer ;
} else {
2006-01-11 07:39:08 +03:00
xfs_buf_deallocate ( new_bp ) ;
if ( unlikely ( bp = = NULL ) )
2005-04-17 02:20:36 +04:00
return NULL ;
}
2006-01-11 07:39:08 +03:00
for ( i = 0 ; i < bp - > b_page_count ; i + + )
mark_page_accessed ( bp - > b_pages [ i ] ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
if ( ! ( bp - > b_flags & XBF_MAPPED ) ) {
error = _xfs_buf_map_pages ( bp , flags ) ;
2005-04-17 02:20:36 +04:00
if ( unlikely ( error ) ) {
printk ( KERN_WARNING " %s: failed to map pages \n " ,
__FUNCTION__ ) ;
goto no_buffer ;
}
}
2006-01-11 07:39:08 +03:00
XFS_STATS_INC ( xb_get ) ;
2005-04-17 02:20:36 +04:00
/*
* Always fill in the block number now , the mapped cases can do
* their own overlay of this later .
*/
2006-01-11 07:39:08 +03:00
bp - > b_bn = ioff ;
bp - > b_count_desired = bp - > b_buffer_length ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " get " , ( unsigned long ) flags ) ;
return bp ;
2005-04-17 02:20:36 +04:00
no_buffer :
2006-01-11 07:39:08 +03:00
if ( flags & ( XBF_LOCK | XBF_TRYLOCK ) )
xfs_buf_unlock ( bp ) ;
xfs_buf_rele ( bp ) ;
2005-04-17 02:20:36 +04:00
return NULL ;
}
xfs_buf_t *
xfs_buf_read_flags (
xfs_buftarg_t * target ,
2006-01-11 12:50:22 +03:00
xfs_off_t ioff ,
2005-04-17 02:20:36 +04:00
size_t isize ,
2006-01-11 07:39:08 +03:00
xfs_buf_flags_t flags )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
xfs_buf_t * bp ;
flags | = XBF_READ ;
bp = xfs_buf_get_flags ( target , ioff , isize , flags ) ;
if ( bp ) {
if ( ! XFS_BUF_ISDONE ( bp ) ) {
XB_TRACE ( bp , " read " , ( unsigned long ) flags ) ;
XFS_STATS_INC ( xb_get_read ) ;
xfs_buf_iostart ( bp , flags ) ;
} else if ( flags & XBF_ASYNC ) {
XB_TRACE ( bp , " read_async " , ( unsigned long ) flags ) ;
2005-04-17 02:20:36 +04:00
/*
* Read ahead call which is already satisfied ,
* drop the buffer
*/
goto no_buffer ;
} else {
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " read_done " , ( unsigned long ) flags ) ;
2005-04-17 02:20:36 +04:00
/* We do not want read in the flags */
2006-01-11 07:39:08 +03:00
bp - > b_flags & = ~ XBF_READ ;
2005-04-17 02:20:36 +04:00
}
}
2006-01-11 07:39:08 +03:00
return bp ;
2005-04-17 02:20:36 +04:00
no_buffer :
2006-01-11 07:39:08 +03:00
if ( flags & ( XBF_LOCK | XBF_TRYLOCK ) )
xfs_buf_unlock ( bp ) ;
xfs_buf_rele ( bp ) ;
2005-04-17 02:20:36 +04:00
return NULL ;
}
/*
2006-01-11 07:39:08 +03:00
* If we are not low on memory then do the readahead in a deadlock
* safe manner .
2005-04-17 02:20:36 +04:00
*/
void
2006-01-11 07:39:08 +03:00
xfs_buf_readahead (
2005-04-17 02:20:36 +04:00
xfs_buftarg_t * target ,
2006-01-11 12:50:22 +03:00
xfs_off_t ioff ,
2005-04-17 02:20:36 +04:00
size_t isize ,
2006-01-11 07:39:08 +03:00
xfs_buf_flags_t flags )
2005-04-17 02:20:36 +04:00
{
struct backing_dev_info * bdi ;
2006-01-11 07:39:08 +03:00
bdi = target - > bt_mapping - > backing_dev_info ;
2005-04-17 02:20:36 +04:00
if ( bdi_read_congested ( bdi ) )
return ;
2006-01-11 07:39:08 +03:00
flags | = ( XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD ) ;
2005-04-17 02:20:36 +04:00
xfs_buf_read_flags ( target , ioff , isize , flags ) ;
}
xfs_buf_t *
2006-01-11 07:39:08 +03:00
xfs_buf_get_empty (
2005-04-17 02:20:36 +04:00
size_t len ,
xfs_buftarg_t * target )
{
2006-01-11 07:39:08 +03:00
xfs_buf_t * bp ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
bp = xfs_buf_allocate ( 0 ) ;
if ( bp )
_xfs_buf_initialize ( bp , target , 0 , len , 0 ) ;
return bp ;
2005-04-17 02:20:36 +04:00
}
static inline struct page *
mem_to_page (
void * addr )
{
if ( ( ( unsigned long ) addr < VMALLOC_START ) | |
( ( unsigned long ) addr > = VMALLOC_END ) ) {
return virt_to_page ( addr ) ;
} else {
return vmalloc_to_page ( addr ) ;
}
}
int
2006-01-11 07:39:08 +03:00
xfs_buf_associate_memory (
xfs_buf_t * bp ,
2005-04-17 02:20:36 +04:00
void * mem ,
size_t len )
{
int rval ;
int i = 0 ;
size_t ptr ;
size_t end , end_cur ;
off_t offset ;
int page_count ;
page_count = PAGE_CACHE_ALIGN ( len ) > > PAGE_CACHE_SHIFT ;
offset = ( off_t ) mem - ( ( off_t ) mem & PAGE_CACHE_MASK ) ;
if ( offset & & ( len > PAGE_CACHE_SIZE ) )
page_count + + ;
/* Free any previous set of page pointers */
2006-01-11 07:39:08 +03:00
if ( bp - > b_pages )
_xfs_buf_free_pages ( bp ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
bp - > b_pages = NULL ;
bp - > b_addr = mem ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
rval = _xfs_buf_get_pages ( bp , page_count , 0 ) ;
2005-04-17 02:20:36 +04:00
if ( rval )
return rval ;
2006-01-11 07:39:08 +03:00
bp - > b_offset = offset ;
2005-04-17 02:20:36 +04:00
ptr = ( size_t ) mem & PAGE_CACHE_MASK ;
end = PAGE_CACHE_ALIGN ( ( size_t ) mem + len ) ;
end_cur = end ;
/* set up first page */
2006-01-11 07:39:08 +03:00
bp - > b_pages [ 0 ] = mem_to_page ( mem ) ;
2005-04-17 02:20:36 +04:00
ptr + = PAGE_CACHE_SIZE ;
2006-01-11 07:39:08 +03:00
bp - > b_page_count = + + i ;
2005-04-17 02:20:36 +04:00
while ( ptr < end ) {
2006-01-11 07:39:08 +03:00
bp - > b_pages [ i ] = mem_to_page ( ( void * ) ptr ) ;
bp - > b_page_count = + + i ;
2005-04-17 02:20:36 +04:00
ptr + = PAGE_CACHE_SIZE ;
}
2006-01-11 07:39:08 +03:00
bp - > b_locked = 0 ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
bp - > b_count_desired = bp - > b_buffer_length = len ;
bp - > b_flags | = XBF_MAPPED ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
xfs_buf_t *
2006-01-11 07:39:08 +03:00
xfs_buf_get_noaddr (
2005-04-17 02:20:36 +04:00
size_t len ,
xfs_buftarg_t * target )
{
size_t malloc_len = len ;
xfs_buf_t * bp ;
void * data ;
int error ;
2006-01-11 07:39:08 +03:00
bp = xfs_buf_allocate ( 0 ) ;
2005-04-17 02:20:36 +04:00
if ( unlikely ( bp = = NULL ) )
goto fail ;
2006-01-11 07:39:08 +03:00
_xfs_buf_initialize ( bp , target , 0 , len , 0 ) ;
2005-04-17 02:20:36 +04:00
try_again :
2006-09-28 05:03:05 +04:00
data = kmem_alloc ( malloc_len , KM_SLEEP | KM_MAYFAIL | KM_LARGE ) ;
2005-04-17 02:20:36 +04:00
if ( unlikely ( data = = NULL ) )
goto fail_free_buf ;
/* check whether alignment matches.. */
if ( ( __psunsigned_t ) data ! =
2006-01-11 07:39:08 +03:00
( ( __psunsigned_t ) data & ~ target - > bt_smask ) ) {
2005-04-17 02:20:36 +04:00
/* .. else double the size and try again */
kmem_free ( data , malloc_len ) ;
malloc_len < < = 1 ;
goto try_again ;
}
2006-01-11 07:39:08 +03:00
error = xfs_buf_associate_memory ( bp , data , len ) ;
2005-04-17 02:20:36 +04:00
if ( error )
goto fail_free_mem ;
2006-01-11 07:39:08 +03:00
bp - > b_flags | = _XBF_KMEM_ALLOC ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
xfs_buf_unlock ( bp ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " no_daddr " , data ) ;
2005-04-17 02:20:36 +04:00
return bp ;
fail_free_mem :
kmem_free ( data , malloc_len ) ;
fail_free_buf :
2006-01-11 07:39:08 +03:00
xfs_buf_free ( bp ) ;
2005-04-17 02:20:36 +04:00
fail :
return NULL ;
}
/*
* Increment reference count on buffer , to hold the buffer concurrently
* with another thread which may release ( free ) the buffer asynchronously .
* Must hold the buffer already to call this function .
*/
void
2006-01-11 07:39:08 +03:00
xfs_buf_hold (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
atomic_inc ( & bp - > b_hold ) ;
XB_TRACE ( bp , " hold " , 0 ) ;
2005-04-17 02:20:36 +04:00
}
/*
2006-01-11 07:39:08 +03:00
* Releases a hold on the specified buffer . If the
* the hold count is 1 , calls xfs_buf_free .
2005-04-17 02:20:36 +04:00
*/
void
2006-01-11 07:39:08 +03:00
xfs_buf_rele (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
xfs_bufhash_t * hash = bp - > b_hash ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " rele " , bp - > b_relse ) ;
2005-04-17 02:20:36 +04:00
2006-02-01 04:14:52 +03:00
if ( unlikely ( ! hash ) ) {
ASSERT ( ! bp - > b_relse ) ;
if ( atomic_dec_and_test ( & bp - > b_hold ) )
xfs_buf_free ( bp ) ;
return ;
}
2006-01-11 07:39:08 +03:00
if ( atomic_dec_and_lock ( & bp - > b_hold , & hash - > bh_lock ) ) {
if ( bp - > b_relse ) {
atomic_inc ( & bp - > b_hold ) ;
2005-04-17 02:20:36 +04:00
spin_unlock ( & hash - > bh_lock ) ;
2006-01-11 07:39:08 +03:00
( * ( bp - > b_relse ) ) ( bp ) ;
} else if ( bp - > b_flags & XBF_FS_MANAGED ) {
2005-04-17 02:20:36 +04:00
spin_unlock ( & hash - > bh_lock ) ;
} else {
2006-01-11 07:39:08 +03:00
ASSERT ( ! ( bp - > b_flags & ( XBF_DELWRI | _XBF_DELWRI_Q ) ) ) ;
list_del_init ( & bp - > b_hash_list ) ;
2005-04-17 02:20:36 +04:00
spin_unlock ( & hash - > bh_lock ) ;
2006-01-11 07:39:08 +03:00
xfs_buf_free ( bp ) ;
2005-04-17 02:20:36 +04:00
}
2005-09-05 02:33:35 +04:00
} else {
/*
* Catch reference count leaks
*/
2006-01-11 07:39:08 +03:00
ASSERT ( atomic_read ( & bp - > b_hold ) > = 0 ) ;
2005-04-17 02:20:36 +04:00
}
}
/*
* Mutual exclusion on buffers . Locking model :
*
* Buffers associated with inodes for which buffer locking
* is not enabled are not protected by semaphores , and are
* assumed to be exclusively owned by the caller . There is a
* spinlock in the buffer , used by the caller when concurrent
* access is possible .
*/
/*
2006-01-11 07:39:08 +03:00
* Locks a buffer object , if it is not already locked .
* Note that this in no way locks the underlying pages , so it is only
* useful for synchronizing concurrent use of buffer objects , not for
* synchronizing independent access to the underlying pages .
2005-04-17 02:20:36 +04:00
*/
int
2006-01-11 07:39:08 +03:00
xfs_buf_cond_lock (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
int locked ;
2006-01-11 07:39:08 +03:00
locked = down_trylock ( & bp - > b_sema ) = = 0 ;
2005-04-17 02:20:36 +04:00
if ( locked ) {
2006-01-11 07:39:08 +03:00
XB_SET_OWNER ( bp ) ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " cond_lock " , ( long ) locked ) ;
return locked ? 0 : - EBUSY ;
2005-04-17 02:20:36 +04:00
}
# if defined(DEBUG) || defined(XFS_BLI_TRACE)
int
2006-01-11 07:39:08 +03:00
xfs_buf_lock_value (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
return atomic_read ( & bp - > b_sema . count ) ;
2005-04-17 02:20:36 +04:00
}
# endif
/*
2006-01-11 07:39:08 +03:00
* Locks a buffer object .
* Note that this in no way locks the underlying pages , so it is only
* useful for synchronizing concurrent use of buffer objects , not for
* synchronizing independent access to the underlying pages .
2005-04-17 02:20:36 +04:00
*/
2006-01-11 07:39:08 +03:00
void
xfs_buf_lock (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " lock " , 0 ) ;
if ( atomic_read ( & bp - > b_io_remaining ) )
blk_run_address_space ( bp - > b_target - > bt_mapping ) ;
down ( & bp - > b_sema ) ;
XB_SET_OWNER ( bp ) ;
XB_TRACE ( bp , " locked " , 0 ) ;
2005-04-17 02:20:36 +04:00
}
/*
2006-01-11 07:39:08 +03:00
* Releases the lock on the buffer object .
2005-09-05 02:33:35 +04:00
* If the buffer is marked delwri but is not queued , do so before we
2006-01-11 07:39:08 +03:00
* unlock the buffer as we need to set flags correctly . We also need to
2005-09-05 02:33:35 +04:00
* take a reference for the delwri queue because the unlocker is going to
* drop their ' s and they don ' t know we just queued it .
2005-04-17 02:20:36 +04:00
*/
void
2006-01-11 07:39:08 +03:00
xfs_buf_unlock (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
if ( ( bp - > b_flags & ( XBF_DELWRI | _XBF_DELWRI_Q ) ) = = XBF_DELWRI ) {
atomic_inc ( & bp - > b_hold ) ;
bp - > b_flags | = XBF_ASYNC ;
xfs_buf_delwri_queue ( bp , 0 ) ;
2005-09-05 02:33:35 +04:00
}
2006-01-11 07:39:08 +03:00
XB_CLEAR_OWNER ( bp ) ;
up ( & bp - > b_sema ) ;
XB_TRACE ( bp , " unlock " , 0 ) ;
2005-04-17 02:20:36 +04:00
}
/*
* Pinning Buffer Storage in Memory
2006-01-11 07:39:08 +03:00
* Ensure that no attempt to force a buffer to disk will succeed .
2005-04-17 02:20:36 +04:00
*/
void
2006-01-11 07:39:08 +03:00
xfs_buf_pin (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
atomic_inc ( & bp - > b_pin_count ) ;
XB_TRACE ( bp , " pin " , ( long ) bp - > b_pin_count . counter ) ;
2005-04-17 02:20:36 +04:00
}
void
2006-01-11 07:39:08 +03:00
xfs_buf_unpin (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
if ( atomic_dec_and_test ( & bp - > b_pin_count ) )
wake_up_all ( & bp - > b_waiters ) ;
XB_TRACE ( bp , " unpin " , ( long ) bp - > b_pin_count . counter ) ;
2005-04-17 02:20:36 +04:00
}
int
2006-01-11 07:39:08 +03:00
xfs_buf_ispin (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
return atomic_read ( & bp - > b_pin_count ) ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 07:39:08 +03:00
STATIC void
xfs_buf_wait_unpin (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
DECLARE_WAITQUEUE ( wait , current ) ;
2006-01-11 07:39:08 +03:00
if ( atomic_read ( & bp - > b_pin_count ) = = 0 )
2005-04-17 02:20:36 +04:00
return ;
2006-01-11 07:39:08 +03:00
add_wait_queue ( & bp - > b_waiters , & wait ) ;
2005-04-17 02:20:36 +04:00
for ( ; ; ) {
set_current_state ( TASK_UNINTERRUPTIBLE ) ;
2006-01-11 07:39:08 +03:00
if ( atomic_read ( & bp - > b_pin_count ) = = 0 )
2005-04-17 02:20:36 +04:00
break ;
2006-01-11 07:39:08 +03:00
if ( atomic_read ( & bp - > b_io_remaining ) )
blk_run_address_space ( bp - > b_target - > bt_mapping ) ;
2005-04-17 02:20:36 +04:00
schedule ( ) ;
}
2006-01-11 07:39:08 +03:00
remove_wait_queue ( & bp - > b_waiters , & wait ) ;
2005-04-17 02:20:36 +04:00
set_current_state ( TASK_RUNNING ) ;
}
/*
* Buffer Utility Routines
*/
STATIC void
2006-01-11 07:39:08 +03:00
xfs_buf_iodone_work (
2006-11-22 17:57:56 +03:00
struct work_struct * work )
2005-04-17 02:20:36 +04:00
{
2006-11-22 17:57:56 +03:00
xfs_buf_t * bp =
container_of ( work , xfs_buf_t , b_iodone_work ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
if ( bp - > b_iodone )
( * ( bp - > b_iodone ) ) ( bp ) ;
else if ( bp - > b_flags & XBF_ASYNC )
2005-04-17 02:20:36 +04:00
xfs_buf_relse ( bp ) ;
}
void
2006-01-11 07:39:08 +03:00
xfs_buf_ioend (
xfs_buf_t * bp ,
2005-04-17 02:20:36 +04:00
int schedule )
{
2006-01-11 07:39:08 +03:00
bp - > b_flags & = ~ ( XBF_READ | XBF_WRITE ) ;
if ( bp - > b_error = = 0 )
bp - > b_flags | = XBF_DONE ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " iodone " , bp - > b_iodone ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
if ( ( bp - > b_iodone ) | | ( bp - > b_flags & XBF_ASYNC ) ) {
2005-04-17 02:20:36 +04:00
if ( schedule ) {
2006-11-22 17:57:56 +03:00
INIT_WORK ( & bp - > b_iodone_work , xfs_buf_iodone_work ) ;
2006-01-11 07:39:08 +03:00
queue_work ( xfslogd_workqueue , & bp - > b_iodone_work ) ;
2005-04-17 02:20:36 +04:00
} else {
2006-11-22 17:57:56 +03:00
xfs_buf_iodone_work ( & bp - > b_iodone_work ) ;
2005-04-17 02:20:36 +04:00
}
} else {
2006-01-11 07:39:08 +03:00
up ( & bp - > b_iodonesema ) ;
2005-04-17 02:20:36 +04:00
}
}
void
2006-01-11 07:39:08 +03:00
xfs_buf_ioerror (
xfs_buf_t * bp ,
int error )
2005-04-17 02:20:36 +04:00
{
ASSERT ( error > = 0 & & error < = 0xffff ) ;
2006-01-11 07:39:08 +03:00
bp - > b_error = ( unsigned short ) error ;
XB_TRACE ( bp , " ioerror " , ( unsigned long ) error ) ;
2005-04-17 02:20:36 +04:00
}
/*
2006-01-11 07:39:08 +03:00
* Initiate I / O on a buffer , based on the flags supplied .
* The b_iodone routine in the buffer supplied will only be called
2005-04-17 02:20:36 +04:00
* when all of the subsidiary I / O requests , if any , have been completed .
*/
int
2006-01-11 07:39:08 +03:00
xfs_buf_iostart (
xfs_buf_t * bp ,
xfs_buf_flags_t flags )
2005-04-17 02:20:36 +04:00
{
int status = 0 ;
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " iostart " , ( unsigned long ) flags ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
if ( flags & XBF_DELWRI ) {
bp - > b_flags & = ~ ( XBF_READ | XBF_WRITE | XBF_ASYNC ) ;
bp - > b_flags | = flags & ( XBF_DELWRI | XBF_ASYNC ) ;
xfs_buf_delwri_queue ( bp , 1 ) ;
2005-04-17 02:20:36 +04:00
return status ;
}
2006-01-11 07:39:08 +03:00
bp - > b_flags & = ~ ( XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
XBF_READ_AHEAD | _XBF_RUN_QUEUES ) ;
bp - > b_flags | = flags & ( XBF_READ | XBF_WRITE | XBF_ASYNC | \
XBF_READ_AHEAD | _XBF_RUN_QUEUES ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
BUG_ON ( bp - > b_bn = = XFS_BUF_DADDR_NULL ) ;
2005-04-17 02:20:36 +04:00
/* For writes allow an alternate strategy routine to precede
* the actual I / O request ( which may not be issued at all in
* a shutdown situation , for example ) .
*/
2006-01-11 07:39:08 +03:00
status = ( flags & XBF_WRITE ) ?
xfs_buf_iostrategy ( bp ) : xfs_buf_iorequest ( bp ) ;
2005-04-17 02:20:36 +04:00
/* Wait for I/O if we are not an async request.
* Note : async I / O request completion will release the buffer ,
* and that can already be done by this point . So using the
* buffer pointer from here on , after async I / O , is invalid .
*/
2006-01-11 07:39:08 +03:00
if ( ! status & & ! ( flags & XBF_ASYNC ) )
status = xfs_buf_iowait ( bp ) ;
2005-04-17 02:20:36 +04:00
return status ;
}
2007-02-10 10:34:56 +03:00
STATIC_INLINE int
2006-01-11 07:39:08 +03:00
_xfs_buf_iolocked (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
ASSERT ( bp - > b_flags & ( XBF_READ | XBF_WRITE ) ) ;
if ( bp - > b_flags & XBF_READ )
return bp - > b_locked ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2007-02-10 10:34:56 +03:00
STATIC_INLINE void
2006-01-11 07:39:08 +03:00
_xfs_buf_ioend (
xfs_buf_t * bp ,
2005-04-17 02:20:36 +04:00
int schedule )
{
2006-01-11 07:39:08 +03:00
if ( atomic_dec_and_test ( & bp - > b_io_remaining ) = = 1 ) {
bp - > b_locked = 0 ;
xfs_buf_ioend ( bp , schedule ) ;
2005-04-17 02:20:36 +04:00
}
}
STATIC int
2006-01-11 07:39:08 +03:00
xfs_buf_bio_end_io (
2005-04-17 02:20:36 +04:00
struct bio * bio ,
unsigned int bytes_done ,
int error )
{
2006-01-11 07:39:08 +03:00
xfs_buf_t * bp = ( xfs_buf_t * ) bio - > bi_private ;
unsigned int blocksize = bp - > b_target - > bt_bsize ;
2005-09-02 10:39:56 +04:00
struct bio_vec * bvec = bio - > bi_io_vec + bio - > bi_vcnt - 1 ;
2005-04-17 02:20:36 +04:00
if ( bio - > bi_size )
return 1 ;
if ( ! test_bit ( BIO_UPTODATE , & bio - > bi_flags ) )
2006-01-11 07:39:08 +03:00
bp - > b_error = EIO ;
2005-04-17 02:20:36 +04:00
2005-09-02 10:39:56 +04:00
do {
2005-04-17 02:20:36 +04:00
struct page * page = bvec - > bv_page ;
2006-09-28 05:03:13 +04:00
ASSERT ( ! PagePrivate ( page ) ) ;
2006-01-11 07:39:08 +03:00
if ( unlikely ( bp - > b_error ) ) {
if ( bp - > b_flags & XBF_READ )
2005-09-02 10:39:56 +04:00
ClearPageUptodate ( page ) ;
2006-01-11 07:39:08 +03:00
} else if ( blocksize > = PAGE_CACHE_SIZE ) {
2005-04-17 02:20:36 +04:00
SetPageUptodate ( page ) ;
} else if ( ! PagePrivate ( page ) & &
2006-01-11 07:39:08 +03:00
( bp - > b_flags & _XBF_PAGE_CACHE ) ) {
2005-04-17 02:20:36 +04:00
set_page_region ( page , bvec - > bv_offset , bvec - > bv_len ) ;
}
2005-09-02 10:39:56 +04:00
if ( - - bvec > = bio - > bi_io_vec )
prefetchw ( & bvec - > bv_page - > flags ) ;
2006-01-11 07:39:08 +03:00
if ( _xfs_buf_iolocked ( bp ) ) {
2005-04-17 02:20:36 +04:00
unlock_page ( page ) ;
}
2005-09-02 10:39:56 +04:00
} while ( bvec > = bio - > bi_io_vec ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
_xfs_buf_ioend ( bp , 1 ) ;
2005-04-17 02:20:36 +04:00
bio_put ( bio ) ;
return 0 ;
}
STATIC void
2006-01-11 07:39:08 +03:00
_xfs_buf_ioapply (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
int i , rw , map_i , total_nr_pages , nr_pages ;
struct bio * bio ;
2006-01-11 07:39:08 +03:00
int offset = bp - > b_offset ;
int size = bp - > b_count_desired ;
sector_t sector = bp - > b_bn ;
unsigned int blocksize = bp - > b_target - > bt_bsize ;
int locking = _xfs_buf_iolocked ( bp ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
total_nr_pages = bp - > b_page_count ;
2005-04-17 02:20:36 +04:00
map_i = 0 ;
2006-01-11 07:39:08 +03:00
if ( bp - > b_flags & XBF_ORDERED ) {
ASSERT ( ! ( bp - > b_flags & XBF_READ ) ) ;
2005-11-02 02:26:59 +03:00
rw = WRITE_BARRIER ;
2006-09-28 05:01:57 +04:00
} else if ( bp - > b_flags & _XBF_RUN_QUEUES ) {
ASSERT ( ! ( bp - > b_flags & XBF_READ_AHEAD ) ) ;
bp - > b_flags & = ~ _XBF_RUN_QUEUES ;
rw = ( bp - > b_flags & XBF_WRITE ) ? WRITE_SYNC : READ_SYNC ;
} else {
rw = ( bp - > b_flags & XBF_WRITE ) ? WRITE :
( bp - > b_flags & XBF_READ_AHEAD ) ? READA : READ ;
2005-11-02 02:26:59 +03:00
}
2006-01-11 07:39:08 +03:00
/* Special code path for reading a sub page size buffer in --
2005-04-17 02:20:36 +04:00
* we populate up the whole page , and hence the other metadata
* in the same page . This optimization is only valid when the
2006-01-11 07:39:08 +03:00
* filesystem block size is not smaller than the page size .
2005-04-17 02:20:36 +04:00
*/
2006-01-11 07:39:08 +03:00
if ( ( bp - > b_buffer_length < PAGE_CACHE_SIZE ) & &
( bp - > b_flags & XBF_READ ) & & locking & &
( blocksize > = PAGE_CACHE_SIZE ) ) {
2005-04-17 02:20:36 +04:00
bio = bio_alloc ( GFP_NOIO , 1 ) ;
2006-01-11 07:39:08 +03:00
bio - > bi_bdev = bp - > b_target - > bt_bdev ;
2005-04-17 02:20:36 +04:00
bio - > bi_sector = sector - ( offset > > BBSHIFT ) ;
2006-01-11 07:39:08 +03:00
bio - > bi_end_io = xfs_buf_bio_end_io ;
bio - > bi_private = bp ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
bio_add_page ( bio , bp - > b_pages [ 0 ] , PAGE_CACHE_SIZE , 0 ) ;
2005-04-17 02:20:36 +04:00
size = 0 ;
2006-01-11 07:39:08 +03:00
atomic_inc ( & bp - > b_io_remaining ) ;
2005-04-17 02:20:36 +04:00
goto submit_io ;
}
/* Lock down the pages which we need to for the request */
2006-01-11 07:39:08 +03:00
if ( locking & & ( bp - > b_flags & XBF_WRITE ) & & ( bp - > b_locked = = 0 ) ) {
2005-04-17 02:20:36 +04:00
for ( i = 0 ; size ; i + + ) {
int nbytes = PAGE_CACHE_SIZE - offset ;
2006-01-11 07:39:08 +03:00
struct page * page = bp - > b_pages [ i ] ;
2005-04-17 02:20:36 +04:00
if ( nbytes > size )
nbytes = size ;
lock_page ( page ) ;
size - = nbytes ;
offset = 0 ;
}
2006-01-11 07:39:08 +03:00
offset = bp - > b_offset ;
size = bp - > b_count_desired ;
2005-04-17 02:20:36 +04:00
}
next_chunk :
2006-01-11 07:39:08 +03:00
atomic_inc ( & bp - > b_io_remaining ) ;
2005-04-17 02:20:36 +04:00
nr_pages = BIO_MAX_SECTORS > > ( PAGE_SHIFT - BBSHIFT ) ;
if ( nr_pages > total_nr_pages )
nr_pages = total_nr_pages ;
bio = bio_alloc ( GFP_NOIO , nr_pages ) ;
2006-01-11 07:39:08 +03:00
bio - > bi_bdev = bp - > b_target - > bt_bdev ;
2005-04-17 02:20:36 +04:00
bio - > bi_sector = sector ;
2006-01-11 07:39:08 +03:00
bio - > bi_end_io = xfs_buf_bio_end_io ;
bio - > bi_private = bp ;
2005-04-17 02:20:36 +04:00
for ( ; size & & nr_pages ; nr_pages - - , map_i + + ) {
2006-01-11 07:39:08 +03:00
int rbytes , nbytes = PAGE_CACHE_SIZE - offset ;
2005-04-17 02:20:36 +04:00
if ( nbytes > size )
nbytes = size ;
2006-01-11 07:39:08 +03:00
rbytes = bio_add_page ( bio , bp - > b_pages [ map_i ] , nbytes , offset ) ;
if ( rbytes < nbytes )
2005-04-17 02:20:36 +04:00
break ;
offset = 0 ;
sector + = nbytes > > BBSHIFT ;
size - = nbytes ;
total_nr_pages - - ;
}
submit_io :
if ( likely ( bio - > bi_size ) ) {
submit_bio ( rw , bio ) ;
if ( size )
goto next_chunk ;
} else {
bio_put ( bio ) ;
2006-01-11 07:39:08 +03:00
xfs_buf_ioerror ( bp , EIO ) ;
2005-04-17 02:20:36 +04:00
}
}
int
2006-01-11 07:39:08 +03:00
xfs_buf_iorequest (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " iorequest " , 0 ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
if ( bp - > b_flags & XBF_DELWRI ) {
xfs_buf_delwri_queue ( bp , 1 ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2006-01-11 07:39:08 +03:00
if ( bp - > b_flags & XBF_WRITE ) {
xfs_buf_wait_unpin ( bp ) ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 07:39:08 +03:00
xfs_buf_hold ( bp ) ;
2005-04-17 02:20:36 +04:00
/* Set the count to 1 initially, this will stop an I/O
* completion callout which happens before we have started
2006-01-11 07:39:08 +03:00
* all the I / O from calling xfs_buf_ioend too early .
2005-04-17 02:20:36 +04:00
*/
2006-01-11 07:39:08 +03:00
atomic_set ( & bp - > b_io_remaining , 1 ) ;
_xfs_buf_ioapply ( bp ) ;
_xfs_buf_ioend ( bp , 0 ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
xfs_buf_rele ( bp ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
/*
2006-01-11 07:39:08 +03:00
* Waits for I / O to complete on the buffer supplied .
* It returns immediately if no I / O is pending .
* It returns the I / O error code , if any , or 0 if there was no error .
2005-04-17 02:20:36 +04:00
*/
int
2006-01-11 07:39:08 +03:00
xfs_buf_iowait (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " iowait " , 0 ) ;
if ( atomic_read ( & bp - > b_io_remaining ) )
blk_run_address_space ( bp - > b_target - > bt_mapping ) ;
down ( & bp - > b_iodonesema ) ;
XB_TRACE ( bp , " iowaited " , ( long ) bp - > b_error ) ;
return bp - > b_error ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 07:39:08 +03:00
xfs_caddr_t
xfs_buf_offset (
xfs_buf_t * bp ,
2005-04-17 02:20:36 +04:00
size_t offset )
{
struct page * page ;
2006-01-11 07:39:08 +03:00
if ( bp - > b_flags & XBF_MAPPED )
return XFS_BUF_PTR ( bp ) + offset ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
offset + = bp - > b_offset ;
page = bp - > b_pages [ offset > > PAGE_CACHE_SHIFT ] ;
return ( xfs_caddr_t ) page_address ( page ) + ( offset & ( PAGE_CACHE_SIZE - 1 ) ) ;
2005-04-17 02:20:36 +04:00
}
/*
* Move data into or out of a buffer .
*/
void
2006-01-11 07:39:08 +03:00
xfs_buf_iomove (
xfs_buf_t * bp , /* buffer to process */
2005-04-17 02:20:36 +04:00
size_t boff , /* starting buffer offset */
size_t bsize , /* length to copy */
caddr_t data , /* data address */
2006-01-11 07:39:08 +03:00
xfs_buf_rw_t mode ) /* read/write/zero flag */
2005-04-17 02:20:36 +04:00
{
size_t bend , cpoff , csize ;
struct page * page ;
bend = boff + bsize ;
while ( boff < bend ) {
2006-01-11 07:39:08 +03:00
page = bp - > b_pages [ xfs_buf_btoct ( boff + bp - > b_offset ) ] ;
cpoff = xfs_buf_poff ( boff + bp - > b_offset ) ;
2005-04-17 02:20:36 +04:00
csize = min_t ( size_t ,
2006-01-11 07:39:08 +03:00
PAGE_CACHE_SIZE - cpoff , bp - > b_count_desired - boff ) ;
2005-04-17 02:20:36 +04:00
ASSERT ( ( ( csize + cpoff ) < = PAGE_CACHE_SIZE ) ) ;
switch ( mode ) {
2006-01-11 07:39:08 +03:00
case XBRW_ZERO :
2005-04-17 02:20:36 +04:00
memset ( page_address ( page ) + cpoff , 0 , csize ) ;
break ;
2006-01-11 07:39:08 +03:00
case XBRW_READ :
2005-04-17 02:20:36 +04:00
memcpy ( data , page_address ( page ) + cpoff , csize ) ;
break ;
2006-01-11 07:39:08 +03:00
case XBRW_WRITE :
2005-04-17 02:20:36 +04:00
memcpy ( page_address ( page ) + cpoff , data , csize ) ;
}
boff + = csize ;
data + = csize ;
}
}
/*
2006-01-11 07:39:08 +03:00
* Handling of buffer targets ( buftargs ) .
2005-04-17 02:20:36 +04:00
*/
/*
2006-01-11 07:39:08 +03:00
* Wait for any bufs with callbacks that have been submitted but
* have not yet returned . . . walk the hash list for the target .
2005-04-17 02:20:36 +04:00
*/
void
xfs_wait_buftarg (
xfs_buftarg_t * btp )
{
xfs_buf_t * bp , * n ;
xfs_bufhash_t * hash ;
uint i ;
for ( i = 0 ; i < ( 1 < < btp - > bt_hashshift ) ; i + + ) {
hash = & btp - > bt_hash [ i ] ;
again :
spin_lock ( & hash - > bh_lock ) ;
2006-01-11 07:39:08 +03:00
list_for_each_entry_safe ( bp , n , & hash - > bh_list , b_hash_list ) {
ASSERT ( btp = = bp - > b_target ) ;
if ( ! ( bp - > b_flags & XBF_FS_MANAGED ) ) {
2005-04-17 02:20:36 +04:00
spin_unlock ( & hash - > bh_lock ) ;
2005-09-05 02:33:35 +04:00
/*
* Catch superblock reference count leaks
* immediately
*/
2006-01-11 07:39:08 +03:00
BUG_ON ( bp - > b_bn = = 0 ) ;
2005-04-17 02:20:36 +04:00
delay ( 100 ) ;
goto again ;
}
}
spin_unlock ( & hash - > bh_lock ) ;
}
}
/*
2006-01-11 07:39:08 +03:00
* Allocate buffer hash table for a given target .
* For devices containing metadata ( i . e . not the log / realtime devices )
* we need to allocate a much larger hash table .
2005-04-17 02:20:36 +04:00
*/
STATIC void
xfs_alloc_bufhash (
xfs_buftarg_t * btp ,
int external )
{
unsigned int i ;
btp - > bt_hashshift = external ? 3 : 8 ; /* 8 or 256 buckets */
btp - > bt_hashmask = ( 1 < < btp - > bt_hashshift ) - 1 ;
btp - > bt_hash = kmem_zalloc ( ( 1 < < btp - > bt_hashshift ) *
2006-11-11 10:03:49 +03:00
sizeof ( xfs_bufhash_t ) , KM_SLEEP | KM_LARGE ) ;
2005-04-17 02:20:36 +04:00
for ( i = 0 ; i < ( 1 < < btp - > bt_hashshift ) ; i + + ) {
spin_lock_init ( & btp - > bt_hash [ i ] . bh_lock ) ;
INIT_LIST_HEAD ( & btp - > bt_hash [ i ] . bh_list ) ;
}
}
STATIC void
xfs_free_bufhash (
xfs_buftarg_t * btp )
{
2006-01-11 07:39:08 +03:00
kmem_free ( btp - > bt_hash , ( 1 < < btp - > bt_hashshift ) * sizeof ( xfs_bufhash_t ) ) ;
2005-04-17 02:20:36 +04:00
btp - > bt_hash = NULL ;
}
2006-01-11 07:37:58 +03:00
/*
2006-01-11 07:39:08 +03:00
* buftarg list for delwrite queue processing
2006-01-11 07:37:58 +03:00
*/
2007-05-08 07:49:59 +04:00
static LIST_HEAD ( xfs_buftarg_list ) ;
2007-02-10 10:34:56 +03:00
static DEFINE_SPINLOCK ( xfs_buftarg_lock ) ;
2006-01-11 07:37:58 +03:00
STATIC void
xfs_register_buftarg (
xfs_buftarg_t * btp )
{
spin_lock ( & xfs_buftarg_lock ) ;
list_add ( & btp - > bt_list , & xfs_buftarg_list ) ;
spin_unlock ( & xfs_buftarg_lock ) ;
}
STATIC void
xfs_unregister_buftarg (
xfs_buftarg_t * btp )
{
spin_lock ( & xfs_buftarg_lock ) ;
list_del ( & btp - > bt_list ) ;
spin_unlock ( & xfs_buftarg_lock ) ;
}
2005-04-17 02:20:36 +04:00
void
xfs_free_buftarg (
xfs_buftarg_t * btp ,
int external )
{
xfs_flush_buftarg ( btp , 1 ) ;
if ( external )
2006-01-11 07:39:08 +03:00
xfs_blkdev_put ( btp - > bt_bdev ) ;
2005-04-17 02:20:36 +04:00
xfs_free_bufhash ( btp ) ;
2006-01-11 07:39:08 +03:00
iput ( btp - > bt_mapping - > host ) ;
2006-01-11 07:37:58 +03:00
2006-01-11 07:39:08 +03:00
/* Unregister the buftarg first so that we don't get a
* wakeup finding a non - existent task
*/
2006-01-11 07:37:58 +03:00
xfs_unregister_buftarg ( btp ) ;
kthread_stop ( btp - > bt_task ) ;
2005-04-17 02:20:36 +04:00
kmem_free ( btp , sizeof ( * btp ) ) ;
}
STATIC int
xfs_setsize_buftarg_flags (
xfs_buftarg_t * btp ,
unsigned int blocksize ,
unsigned int sectorsize ,
int verbose )
{
2006-01-11 07:39:08 +03:00
btp - > bt_bsize = blocksize ;
btp - > bt_sshift = ffs ( sectorsize ) - 1 ;
btp - > bt_smask = sectorsize - 1 ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
if ( set_blocksize ( btp - > bt_bdev , sectorsize ) ) {
2005-04-17 02:20:36 +04:00
printk ( KERN_WARNING
" XFS: Cannot set_blocksize to %u on device %s \n " ,
sectorsize , XFS_BUFTARG_NAME ( btp ) ) ;
return EINVAL ;
}
if ( verbose & &
( PAGE_CACHE_SIZE / BITS_PER_LONG ) > sectorsize ) {
printk ( KERN_WARNING
" XFS: %u byte sectors in use on device %s. "
" This is suboptimal; %u or greater is ideal. \n " ,
sectorsize , XFS_BUFTARG_NAME ( btp ) ,
( unsigned int ) PAGE_CACHE_SIZE / BITS_PER_LONG ) ;
}
return 0 ;
}
/*
2006-01-11 07:39:08 +03:00
* When allocating the initial buffer target we have not yet
* read in the superblock , so don ' t know what sized sectors
* are being used is at this early stage . Play safe .
*/
2005-04-17 02:20:36 +04:00
STATIC int
xfs_setsize_buftarg_early (
xfs_buftarg_t * btp ,
struct block_device * bdev )
{
return xfs_setsize_buftarg_flags ( btp ,
PAGE_CACHE_SIZE , bdev_hardsect_size ( bdev ) , 0 ) ;
}
int
xfs_setsize_buftarg (
xfs_buftarg_t * btp ,
unsigned int blocksize ,
unsigned int sectorsize )
{
return xfs_setsize_buftarg_flags ( btp , blocksize , sectorsize , 1 ) ;
}
STATIC int
xfs_mapping_buftarg (
xfs_buftarg_t * btp ,
struct block_device * bdev )
{
struct backing_dev_info * bdi ;
struct inode * inode ;
struct address_space * mapping ;
2006-06-28 15:26:44 +04:00
static const struct address_space_operations mapping_aops = {
2005-04-17 02:20:36 +04:00
. sync_page = block_sync_page ,
2006-02-01 14:05:41 +03:00
. migratepage = fail_migrate_page ,
2005-04-17 02:20:36 +04:00
} ;
inode = new_inode ( bdev - > bd_inode - > i_sb ) ;
if ( ! inode ) {
printk ( KERN_WARNING
" XFS: Cannot allocate mapping inode for device %s \n " ,
XFS_BUFTARG_NAME ( btp ) ) ;
return ENOMEM ;
}
inode - > i_mode = S_IFBLK ;
inode - > i_bdev = bdev ;
inode - > i_rdev = bdev - > bd_dev ;
bdi = blk_get_backing_dev_info ( bdev ) ;
if ( ! bdi )
bdi = & default_backing_dev_info ;
mapping = & inode - > i_data ;
mapping - > a_ops = & mapping_aops ;
mapping - > backing_dev_info = bdi ;
mapping_set_gfp_mask ( mapping , GFP_NOFS ) ;
2006-01-11 07:39:08 +03:00
btp - > bt_mapping = mapping ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2006-01-11 07:37:58 +03:00
STATIC int
xfs_alloc_delwrite_queue (
xfs_buftarg_t * btp )
{
int error = 0 ;
INIT_LIST_HEAD ( & btp - > bt_list ) ;
INIT_LIST_HEAD ( & btp - > bt_delwrite_queue ) ;
spinlock_init ( & btp - > bt_delwrite_lock , " delwri_lock " ) ;
btp - > bt_flags = 0 ;
btp - > bt_task = kthread_run ( xfsbufd , btp , " xfsbufd " ) ;
if ( IS_ERR ( btp - > bt_task ) ) {
error = PTR_ERR ( btp - > bt_task ) ;
goto out_error ;
}
xfs_register_buftarg ( btp ) ;
out_error :
return error ;
}
2005-04-17 02:20:36 +04:00
xfs_buftarg_t *
xfs_alloc_buftarg (
struct block_device * bdev ,
int external )
{
xfs_buftarg_t * btp ;
btp = kmem_zalloc ( sizeof ( * btp ) , KM_SLEEP ) ;
2006-01-11 07:39:08 +03:00
btp - > bt_dev = bdev - > bd_dev ;
btp - > bt_bdev = bdev ;
2005-04-17 02:20:36 +04:00
if ( xfs_setsize_buftarg_early ( btp , bdev ) )
goto error ;
if ( xfs_mapping_buftarg ( btp , bdev ) )
goto error ;
2006-01-11 07:37:58 +03:00
if ( xfs_alloc_delwrite_queue ( btp ) )
goto error ;
2005-04-17 02:20:36 +04:00
xfs_alloc_bufhash ( btp , external ) ;
return btp ;
error :
kmem_free ( btp , sizeof ( * btp ) ) ;
return NULL ;
}
/*
2006-01-11 07:39:08 +03:00
* Delayed write buffer handling
2005-04-17 02:20:36 +04:00
*/
STATIC void
2006-01-11 07:39:08 +03:00
xfs_buf_delwri_queue (
xfs_buf_t * bp ,
2005-04-17 02:20:36 +04:00
int unlock )
{
2006-01-11 07:39:08 +03:00
struct list_head * dwq = & bp - > b_target - > bt_delwrite_queue ;
spinlock_t * dwlk = & bp - > b_target - > bt_delwrite_lock ;
2006-01-11 07:37:58 +03:00
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " delwri_q " , ( long ) unlock ) ;
ASSERT ( ( bp - > b_flags & ( XBF_DELWRI | XBF_ASYNC ) ) = = ( XBF_DELWRI | XBF_ASYNC ) ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:37:58 +03:00
spin_lock ( dwlk ) ;
2005-04-17 02:20:36 +04:00
/* If already in the queue, dequeue and place at tail */
2006-01-11 07:39:08 +03:00
if ( ! list_empty ( & bp - > b_list ) ) {
ASSERT ( bp - > b_flags & _XBF_DELWRI_Q ) ;
if ( unlock )
atomic_dec ( & bp - > b_hold ) ;
list_del ( & bp - > b_list ) ;
2005-04-17 02:20:36 +04:00
}
2006-01-11 07:39:08 +03:00
bp - > b_flags | = _XBF_DELWRI_Q ;
list_add_tail ( & bp - > b_list , dwq ) ;
bp - > b_queuetime = jiffies ;
2006-01-11 07:37:58 +03:00
spin_unlock ( dwlk ) ;
2005-04-17 02:20:36 +04:00
if ( unlock )
2006-01-11 07:39:08 +03:00
xfs_buf_unlock ( bp ) ;
2005-04-17 02:20:36 +04:00
}
void
2006-01-11 07:39:08 +03:00
xfs_buf_delwri_dequeue (
xfs_buf_t * bp )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
spinlock_t * dwlk = & bp - > b_target - > bt_delwrite_lock ;
2005-04-17 02:20:36 +04:00
int dequeued = 0 ;
2006-01-11 07:37:58 +03:00
spin_lock ( dwlk ) ;
2006-01-11 07:39:08 +03:00
if ( ( bp - > b_flags & XBF_DELWRI ) & & ! list_empty ( & bp - > b_list ) ) {
ASSERT ( bp - > b_flags & _XBF_DELWRI_Q ) ;
list_del_init ( & bp - > b_list ) ;
2005-04-17 02:20:36 +04:00
dequeued = 1 ;
}
2006-01-11 07:39:08 +03:00
bp - > b_flags & = ~ ( XBF_DELWRI | _XBF_DELWRI_Q ) ;
2006-01-11 07:37:58 +03:00
spin_unlock ( dwlk ) ;
2005-04-17 02:20:36 +04:00
if ( dequeued )
2006-01-11 07:39:08 +03:00
xfs_buf_rele ( bp ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
XB_TRACE ( bp , " delwri_dq " , ( long ) dequeued ) ;
2005-04-17 02:20:36 +04:00
}
STATIC void
2006-01-11 07:39:08 +03:00
xfs_buf_runall_queues (
2005-04-17 02:20:36 +04:00
struct workqueue_struct * queue )
{
flush_workqueue ( queue ) ;
}
STATIC int
2005-06-21 09:14:01 +04:00
xfsbufd_wakeup (
2005-11-04 02:51:01 +03:00
int priority ,
gfp_t mask )
2005-04-17 02:20:36 +04:00
{
2006-01-11 12:49:57 +03:00
xfs_buftarg_t * btp ;
2006-01-11 07:37:58 +03:00
spin_lock ( & xfs_buftarg_lock ) ;
2006-01-11 12:49:57 +03:00
list_for_each_entry ( btp , & xfs_buftarg_list , bt_list ) {
2006-01-11 07:39:08 +03:00
if ( test_bit ( XBT_FORCE_SLEEP , & btp - > bt_flags ) )
2006-01-11 07:37:58 +03:00
continue ;
2006-01-11 07:39:08 +03:00
set_bit ( XBT_FORCE_FLUSH , & btp - > bt_flags ) ;
2006-01-11 07:37:58 +03:00
wake_up_process ( btp - > bt_task ) ;
}
spin_unlock ( & xfs_buftarg_lock ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
}
2007-02-10 10:32:29 +03:00
/*
* Move as many buffers as specified to the supplied list
* idicating if we skipped any buffers to prevent deadlocks .
*/
STATIC int
xfs_buf_delwri_split (
xfs_buftarg_t * target ,
struct list_head * list ,
2007-02-10 10:34:49 +03:00
unsigned long age )
2007-02-10 10:32:29 +03:00
{
xfs_buf_t * bp , * n ;
struct list_head * dwq = & target - > bt_delwrite_queue ;
spinlock_t * dwlk = & target - > bt_delwrite_lock ;
int skipped = 0 ;
2007-02-10 10:34:49 +03:00
int force ;
2007-02-10 10:32:29 +03:00
2007-02-10 10:34:49 +03:00
force = test_and_clear_bit ( XBT_FORCE_FLUSH , & target - > bt_flags ) ;
2007-02-10 10:32:29 +03:00
INIT_LIST_HEAD ( list ) ;
spin_lock ( dwlk ) ;
list_for_each_entry_safe ( bp , n , dwq , b_list ) {
XB_TRACE ( bp , " walkq1 " , ( long ) xfs_buf_ispin ( bp ) ) ;
ASSERT ( bp - > b_flags & XBF_DELWRI ) ;
if ( ! xfs_buf_ispin ( bp ) & & ! xfs_buf_cond_lock ( bp ) ) {
2007-02-10 10:34:49 +03:00
if ( ! force & &
2007-02-10 10:32:29 +03:00
time_before ( jiffies , bp - > b_queuetime + age ) ) {
xfs_buf_unlock ( bp ) ;
break ;
}
bp - > b_flags & = ~ ( XBF_DELWRI | _XBF_DELWRI_Q |
_XBF_RUN_QUEUES ) ;
bp - > b_flags | = XBF_WRITE ;
list_move_tail ( & bp - > b_list , list ) ;
} else
skipped + + ;
}
spin_unlock ( dwlk ) ;
return skipped ;
}
2005-04-17 02:20:36 +04:00
STATIC int
2005-06-21 09:14:01 +04:00
xfsbufd (
2007-02-10 10:32:29 +03:00
void * data )
2005-04-17 02:20:36 +04:00
{
2007-02-10 10:32:29 +03:00
struct list_head tmp ;
xfs_buftarg_t * target = ( xfs_buftarg_t * ) data ;
int count ;
xfs_buf_t * bp ;
2005-04-17 02:20:36 +04:00
current - > flags | = PF_MEMALLOC ;
do {
2005-06-25 10:13:50 +04:00
if ( unlikely ( freezing ( current ) ) ) {
2006-01-11 07:39:08 +03:00
set_bit ( XBT_FORCE_SLEEP , & target - > bt_flags ) ;
2005-06-25 10:13:50 +04:00
refrigerator ( ) ;
2005-05-06 00:30:13 +04:00
} else {
2006-01-11 07:39:08 +03:00
clear_bit ( XBT_FORCE_SLEEP , & target - > bt_flags ) ;
2005-05-06 00:30:13 +04:00
}
2005-04-17 02:20:36 +04:00
2005-11-04 02:51:01 +03:00
schedule_timeout_interruptible (
xfs_buf_timer_centisecs * msecs_to_jiffies ( 10 ) ) ;
2005-04-17 02:20:36 +04:00
2007-02-10 10:32:29 +03:00
xfs_buf_delwri_split ( target , & tmp ,
2007-02-10 10:34:49 +03:00
xfs_buf_age_centisecs * msecs_to_jiffies ( 10 ) ) ;
2005-04-17 02:20:36 +04:00
2007-02-10 10:32:29 +03:00
count = 0 ;
2005-04-17 02:20:36 +04:00
while ( ! list_empty ( & tmp ) ) {
2006-01-11 07:39:08 +03:00
bp = list_entry ( tmp . next , xfs_buf_t , b_list ) ;
ASSERT ( target = = bp - > b_target ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
list_del_init ( & bp - > b_list ) ;
xfs_buf_iostrategy ( bp ) ;
2007-02-10 10:32:29 +03:00
count + + ;
2005-04-17 02:20:36 +04:00
}
if ( as_list_len > 0 )
purge_addresses ( ) ;
2006-09-28 04:52:15 +04:00
if ( count )
blk_run_address_space ( target - > bt_mapping ) ;
2005-04-17 02:20:36 +04:00
2005-09-05 02:34:18 +04:00
} while ( ! kthread_should_stop ( ) ) ;
2005-04-17 02:20:36 +04:00
2005-09-05 02:34:18 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
/*
2006-01-11 07:39:08 +03:00
* Go through all incore buffers , and release buffers if they belong to
* the given device . This is used in filesystem error handling to
* preserve the consistency of its metadata .
2005-04-17 02:20:36 +04:00
*/
int
xfs_flush_buftarg (
2007-02-10 10:32:29 +03:00
xfs_buftarg_t * target ,
int wait )
2005-04-17 02:20:36 +04:00
{
2007-02-10 10:32:29 +03:00
struct list_head tmp ;
xfs_buf_t * bp , * n ;
int pincount = 0 ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
xfs_buf_runall_queues ( xfsdatad_workqueue ) ;
xfs_buf_runall_queues ( xfslogd_workqueue ) ;
2005-04-17 02:20:36 +04:00
2007-02-10 10:34:49 +03:00
set_bit ( XBT_FORCE_FLUSH , & target - > bt_flags ) ;
pincount = xfs_buf_delwri_split ( target , & tmp , 0 ) ;
2005-04-17 02:20:36 +04:00
/*
* Dropped the delayed write list lock , now walk the temporary list
*/
2006-01-11 07:39:08 +03:00
list_for_each_entry_safe ( bp , n , & tmp , b_list ) {
2007-02-10 10:32:29 +03:00
ASSERT ( target = = bp - > b_target ) ;
2005-04-17 02:20:36 +04:00
if ( wait )
2006-01-11 07:39:08 +03:00
bp - > b_flags & = ~ XBF_ASYNC ;
2005-04-17 02:20:36 +04:00
else
2006-01-11 07:39:08 +03:00
list_del_init ( & bp - > b_list ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
xfs_buf_iostrategy ( bp ) ;
2005-04-17 02:20:36 +04:00
}
2006-09-28 04:52:15 +04:00
if ( wait )
blk_run_address_space ( target - > bt_mapping ) ;
2005-04-17 02:20:36 +04:00
/*
* Remaining list items must be flushed before returning
*/
while ( ! list_empty ( & tmp ) ) {
2006-01-11 07:39:08 +03:00
bp = list_entry ( tmp . next , xfs_buf_t , b_list ) ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
list_del_init ( & bp - > b_list ) ;
xfs_iowait ( bp ) ;
xfs_buf_relse ( bp ) ;
2005-04-17 02:20:36 +04:00
}
return pincount ;
}
2005-11-02 02:15:05 +03:00
int __init
2006-01-11 07:39:08 +03:00
xfs_buf_init ( void )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
# ifdef XFS_BUF_TRACE
xfs_buf_trace_buf = ktrace_alloc ( XFS_BUF_TRACE_SIZE , KM_SLEEP ) ;
2005-11-02 02:15:05 +03:00
# endif
2006-03-14 05:18:19 +03:00
xfs_buf_zone = kmem_zone_init_flags ( sizeof ( xfs_buf_t ) , " xfs_buf " ,
KM_ZONE_HWALIGN , NULL ) ;
2006-01-11 07:39:08 +03:00
if ( ! xfs_buf_zone )
2005-11-02 02:15:05 +03:00
goto out_free_trace_buf ;
2007-03-22 11:11:27 +03:00
xfslogd_workqueue = create_workqueue ( " xfslogd " ) ;
2005-06-21 09:14:01 +04:00
if ( ! xfslogd_workqueue )
2005-11-02 02:15:05 +03:00
goto out_free_buf_zone ;
2005-04-17 02:20:36 +04:00
2007-03-22 11:11:27 +03:00
xfsdatad_workqueue = create_workqueue ( " xfsdatad " ) ;
2005-06-21 09:14:01 +04:00
if ( ! xfsdatad_workqueue )
goto out_destroy_xfslogd_workqueue ;
2005-04-17 02:20:36 +04:00
2006-01-11 07:39:08 +03:00
xfs_buf_shake = kmem_shake_register ( xfsbufd_wakeup ) ;
if ( ! xfs_buf_shake )
2006-01-11 07:37:58 +03:00
goto out_destroy_xfsdatad_workqueue ;
2005-11-02 02:15:05 +03:00
2005-06-21 09:14:01 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
2005-06-21 09:14:01 +04:00
out_destroy_xfsdatad_workqueue :
destroy_workqueue ( xfsdatad_workqueue ) ;
out_destroy_xfslogd_workqueue :
destroy_workqueue ( xfslogd_workqueue ) ;
out_free_buf_zone :
2006-01-11 07:39:08 +03:00
kmem_zone_destroy ( xfs_buf_zone ) ;
2005-11-02 02:15:05 +03:00
out_free_trace_buf :
2006-01-11 07:39:08 +03:00
# ifdef XFS_BUF_TRACE
ktrace_free ( xfs_buf_trace_buf ) ;
2005-06-21 09:14:01 +04:00
# endif
2006-03-14 05:18:19 +03:00
return - ENOMEM ;
2005-04-17 02:20:36 +04:00
}
void
2006-01-11 07:39:08 +03:00
xfs_buf_terminate ( void )
2005-04-17 02:20:36 +04:00
{
2006-01-11 07:39:08 +03:00
kmem_shake_deregister ( xfs_buf_shake ) ;
2005-11-02 02:15:05 +03:00
destroy_workqueue ( xfsdatad_workqueue ) ;
destroy_workqueue ( xfslogd_workqueue ) ;
2006-01-11 07:39:08 +03:00
kmem_zone_destroy ( xfs_buf_zone ) ;
# ifdef XFS_BUF_TRACE
ktrace_free ( xfs_buf_trace_buf ) ;
2005-04-17 02:20:36 +04:00
# endif
}
2007-05-08 07:49:59 +04:00
# ifdef CONFIG_KDB_MODULES
struct list_head *
xfs_get_buftarg_list ( void )
{
return & xfs_buftarg_list ;
}
# endif