2005-04-17 02:20:36 +04:00
/*
2005-05-02 22:25:02 +04:00
* Copyright ( C ) International Business Machines Corp . , 2000 - 2005
2005-04-17 02:20:36 +04:00
* Portions Copyright ( C ) Christoph Hellwig , 2001 - 2002
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
2006-10-02 18:55:27 +04:00
* the Free Software Foundation ; either version 2 of the License , or
2005-04-17 02:20:36 +04:00
* ( at your option ) any later version .
2006-10-02 18:55:27 +04:00
*
2005-04-17 02:20:36 +04:00
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See
* the GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
2006-10-02 18:55:27 +04:00
* along with this program ; if not , write to the Free Software
2005-04-17 02:20:36 +04:00
* Foundation , Inc . , 59 Temple Place , Suite 330 , Boston , MA 02111 - 1307 USA
*/
# include <linux/fs.h>
2005-05-02 22:25:02 +04:00
# include <linux/mm.h>
# include <linux/bio.h>
2005-04-17 02:20:36 +04:00
# include <linux/init.h>
# include <linux/buffer_head.h>
# include <linux/mempool.h>
# include "jfs_incore.h"
# include "jfs_superblock.h"
# include "jfs_filsys.h"
# include "jfs_metapage.h"
# include "jfs_txnmgr.h"
# include "jfs_debug.h"
# ifdef CONFIG_JFS_STATISTICS
static struct {
uint pagealloc ; /* # of page allocations */
uint pagefree ; /* # of page frees */
uint lockwait ; /* # of sleeping lock_metapage() calls */
} mpStat ;
# endif
2005-05-02 22:25:02 +04:00
# define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
2007-11-26 23:58:10 +03:00
# define trylock_metapage(mp) test_and_set_bit_lock(META_locked, &(mp)->flag)
2005-04-17 02:20:36 +04:00
static inline void unlock_metapage ( struct metapage * mp )
{
2007-11-26 23:58:10 +03:00
clear_bit_unlock ( META_locked , & mp - > flag ) ;
2005-04-17 02:20:36 +04:00
wake_up ( & mp - > wait ) ;
}
2005-05-02 22:25:02 +04:00
static inline void __lock_metapage ( struct metapage * mp )
2005-04-17 02:20:36 +04:00
{
DECLARE_WAITQUEUE ( wait , current ) ;
INCREMENT ( mpStat . lockwait ) ;
add_wait_queue_exclusive ( & mp - > wait , & wait ) ;
do {
set_current_state ( TASK_UNINTERRUPTIBLE ) ;
if ( metapage_locked ( mp ) ) {
2005-05-02 22:25:02 +04:00
unlock_page ( mp - > page ) ;
2007-01-18 06:18:35 +03:00
io_schedule ( ) ;
2005-05-02 22:25:02 +04:00
lock_page ( mp - > page ) ;
2005-04-17 02:20:36 +04:00
}
} while ( trylock_metapage ( mp ) ) ;
__set_current_state ( TASK_RUNNING ) ;
remove_wait_queue ( & mp - > wait , & wait ) ;
}
2005-05-02 22:25:02 +04:00
/*
* Must have mp - > page locked
*/
2005-04-17 02:20:36 +04:00
static inline void lock_metapage ( struct metapage * mp )
{
if ( trylock_metapage ( mp ) )
__lock_metapage ( mp ) ;
}
# define METAPOOL_MIN_PAGES 32
2006-12-07 07:33:20 +03:00
static struct kmem_cache * metapage_cache ;
2005-04-17 02:20:36 +04:00
static mempool_t * metapage_mempool ;
2005-05-02 22:25:02 +04:00
# define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
# if MPS_PER_PAGE > 1
struct meta_anchor {
int mp_count ;
atomic_t io_count ;
struct metapage * mp [ MPS_PER_PAGE ] ;
} ;
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with
a many-threaded application which concurrently initializes different parts of
a large anonymous area.
This patch corrects that, by using a separate spinlock per page table page, to
guard the page table entries in that page, instead of using the mm's single
page_table_lock. (But even then, page_table_lock is still used to guard page
table allocation, and anon_vma allocation.)
In this implementation, the spinlock is tucked inside the struct page of the
page table page: with a BUILD_BUG_ON in case it overflows - which it would in
the case of 32-bit PA-RISC with spinlock debugging enabled.
Splitting the lock is not quite for free: another cacheline access. Ideally,
I suppose we would use split ptlock only for multi-threaded processes on
multi-cpu machines; but deciding that dynamically would have its own costs.
So for now enable it by config, at some number of cpus - since the Kconfig
language doesn't support inequalities, let preprocessor compare that with
NR_CPUS. But I don't think it's worth being user-configurable: for good
testing of both split and unsplit configs, split now at 4 cpus, and perhaps
change that to 8 later.
There is a benefit even for singly threaded processes: kswapd can be attacking
one part of the mm while another part is busy faulting.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 04:16:40 +03:00
# define mp_anchor(page) ((struct meta_anchor *)page_private(page))
2005-05-02 22:25:02 +04:00
2008-01-11 01:04:25 +03:00
static inline struct metapage * page_to_mp ( struct page * page , int offset )
2005-05-02 22:25:02 +04:00
{
if ( ! PagePrivate ( page ) )
return NULL ;
return mp_anchor ( page ) - > mp [ offset > > L2PSIZE ] ;
}
static inline int insert_metapage ( struct page * page , struct metapage * mp )
{
struct meta_anchor * a ;
int index ;
int l2mp_blocks ; /* log2 blocks per metapage */
if ( PagePrivate ( page ) )
a = mp_anchor ( page ) ;
else {
2006-02-23 18:47:13 +03:00
a = kzalloc ( sizeof ( struct meta_anchor ) , GFP_NOFS ) ;
2005-05-02 22:25:02 +04:00
if ( ! a )
return - ENOMEM ;
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with
a many-threaded application which concurrently initializes different parts of
a large anonymous area.
This patch corrects that, by using a separate spinlock per page table page, to
guard the page table entries in that page, instead of using the mm's single
page_table_lock. (But even then, page_table_lock is still used to guard page
table allocation, and anon_vma allocation.)
In this implementation, the spinlock is tucked inside the struct page of the
page table page: with a BUILD_BUG_ON in case it overflows - which it would in
the case of 32-bit PA-RISC with spinlock debugging enabled.
Splitting the lock is not quite for free: another cacheline access. Ideally,
I suppose we would use split ptlock only for multi-threaded processes on
multi-cpu machines; but deciding that dynamically would have its own costs.
So for now enable it by config, at some number of cpus - since the Kconfig
language doesn't support inequalities, let preprocessor compare that with
NR_CPUS. But I don't think it's worth being user-configurable: for good
testing of both split and unsplit configs, split now at 4 cpus, and perhaps
change that to 8 later.
There is a benefit even for singly threaded processes: kswapd can be attacking
one part of the mm while another part is busy faulting.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 04:16:40 +03:00
set_page_private ( page , ( unsigned long ) a ) ;
2005-05-02 22:25:02 +04:00
SetPagePrivate ( page ) ;
kmap ( page ) ;
}
if ( mp ) {
l2mp_blocks = L2PSIZE - page - > mapping - > host - > i_blkbits ;
index = ( mp - > index > > l2mp_blocks ) & ( MPS_PER_PAGE - 1 ) ;
a - > mp_count + + ;
a - > mp [ index ] = mp ;
}
return 0 ;
}
static inline void remove_metapage ( struct page * page , struct metapage * mp )
{
struct meta_anchor * a = mp_anchor ( page ) ;
int l2mp_blocks = L2PSIZE - page - > mapping - > host - > i_blkbits ;
int index ;
index = ( mp - > index > > l2mp_blocks ) & ( MPS_PER_PAGE - 1 ) ;
BUG_ON ( a - > mp [ index ] ! = mp ) ;
a - > mp [ index ] = NULL ;
if ( - - a - > mp_count = = 0 ) {
kfree ( a ) ;
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with
a many-threaded application which concurrently initializes different parts of
a large anonymous area.
This patch corrects that, by using a separate spinlock per page table page, to
guard the page table entries in that page, instead of using the mm's single
page_table_lock. (But even then, page_table_lock is still used to guard page
table allocation, and anon_vma allocation.)
In this implementation, the spinlock is tucked inside the struct page of the
page table page: with a BUILD_BUG_ON in case it overflows - which it would in
the case of 32-bit PA-RISC with spinlock debugging enabled.
Splitting the lock is not quite for free: another cacheline access. Ideally,
I suppose we would use split ptlock only for multi-threaded processes on
multi-cpu machines; but deciding that dynamically would have its own costs.
So for now enable it by config, at some number of cpus - since the Kconfig
language doesn't support inequalities, let preprocessor compare that with
NR_CPUS. But I don't think it's worth being user-configurable: for good
testing of both split and unsplit configs, split now at 4 cpus, and perhaps
change that to 8 later.
There is a benefit even for singly threaded processes: kswapd can be attacking
one part of the mm while another part is busy faulting.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 04:16:40 +03:00
set_page_private ( page , 0 ) ;
2005-05-02 22:25:02 +04:00
ClearPagePrivate ( page ) ;
kunmap ( page ) ;
}
}
static inline void inc_io ( struct page * page )
{
atomic_inc ( & mp_anchor ( page ) - > io_count ) ;
}
static inline void dec_io ( struct page * page , void ( * handler ) ( struct page * ) )
{
if ( atomic_dec_and_test ( & mp_anchor ( page ) - > io_count ) )
handler ( page ) ;
}
# else
2008-01-11 01:04:25 +03:00
static inline struct metapage * page_to_mp ( struct page * page , int offset )
2005-05-02 22:25:02 +04:00
{
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with
a many-threaded application which concurrently initializes different parts of
a large anonymous area.
This patch corrects that, by using a separate spinlock per page table page, to
guard the page table entries in that page, instead of using the mm's single
page_table_lock. (But even then, page_table_lock is still used to guard page
table allocation, and anon_vma allocation.)
In this implementation, the spinlock is tucked inside the struct page of the
page table page: with a BUILD_BUG_ON in case it overflows - which it would in
the case of 32-bit PA-RISC with spinlock debugging enabled.
Splitting the lock is not quite for free: another cacheline access. Ideally,
I suppose we would use split ptlock only for multi-threaded processes on
multi-cpu machines; but deciding that dynamically would have its own costs.
So for now enable it by config, at some number of cpus - since the Kconfig
language doesn't support inequalities, let preprocessor compare that with
NR_CPUS. But I don't think it's worth being user-configurable: for good
testing of both split and unsplit configs, split now at 4 cpus, and perhaps
change that to 8 later.
There is a benefit even for singly threaded processes: kswapd can be attacking
one part of the mm while another part is busy faulting.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 04:16:40 +03:00
return PagePrivate ( page ) ? ( struct metapage * ) page_private ( page ) : NULL ;
2005-05-02 22:25:02 +04:00
}
static inline int insert_metapage ( struct page * page , struct metapage * mp )
{
if ( mp ) {
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with
a many-threaded application which concurrently initializes different parts of
a large anonymous area.
This patch corrects that, by using a separate spinlock per page table page, to
guard the page table entries in that page, instead of using the mm's single
page_table_lock. (But even then, page_table_lock is still used to guard page
table allocation, and anon_vma allocation.)
In this implementation, the spinlock is tucked inside the struct page of the
page table page: with a BUILD_BUG_ON in case it overflows - which it would in
the case of 32-bit PA-RISC with spinlock debugging enabled.
Splitting the lock is not quite for free: another cacheline access. Ideally,
I suppose we would use split ptlock only for multi-threaded processes on
multi-cpu machines; but deciding that dynamically would have its own costs.
So for now enable it by config, at some number of cpus - since the Kconfig
language doesn't support inequalities, let preprocessor compare that with
NR_CPUS. But I don't think it's worth being user-configurable: for good
testing of both split and unsplit configs, split now at 4 cpus, and perhaps
change that to 8 later.
There is a benefit even for singly threaded processes: kswapd can be attacking
one part of the mm while another part is busy faulting.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 04:16:40 +03:00
set_page_private ( page , ( unsigned long ) mp ) ;
2005-05-02 22:25:02 +04:00
SetPagePrivate ( page ) ;
kmap ( page ) ;
}
return 0 ;
}
static inline void remove_metapage ( struct page * page , struct metapage * mp )
{
[PATCH] mm: split page table lock
Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with
a many-threaded application which concurrently initializes different parts of
a large anonymous area.
This patch corrects that, by using a separate spinlock per page table page, to
guard the page table entries in that page, instead of using the mm's single
page_table_lock. (But even then, page_table_lock is still used to guard page
table allocation, and anon_vma allocation.)
In this implementation, the spinlock is tucked inside the struct page of the
page table page: with a BUILD_BUG_ON in case it overflows - which it would in
the case of 32-bit PA-RISC with spinlock debugging enabled.
Splitting the lock is not quite for free: another cacheline access. Ideally,
I suppose we would use split ptlock only for multi-threaded processes on
multi-cpu machines; but deciding that dynamically would have its own costs.
So for now enable it by config, at some number of cpus - since the Kconfig
language doesn't support inequalities, let preprocessor compare that with
NR_CPUS. But I don't think it's worth being user-configurable: for good
testing of both split and unsplit configs, split now at 4 cpus, and perhaps
change that to 8 later.
There is a benefit even for singly threaded processes: kswapd can be attacking
one part of the mm while another part is busy faulting.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-10-30 04:16:40 +03:00
set_page_private ( page , 0 ) ;
2005-05-02 22:25:02 +04:00
ClearPagePrivate ( page ) ;
kunmap ( page ) ;
}
# define inc_io(page) do {} while(0)
# define dec_io(page, handler) handler(page)
# endif
2007-10-17 10:25:51 +04:00
static void init_once ( struct kmem_cache * cachep , void * foo )
2005-04-17 02:20:36 +04:00
{
struct metapage * mp = ( struct metapage * ) foo ;
2007-05-17 09:10:57 +04:00
mp - > lid = 0 ;
mp - > lsn = 0 ;
mp - > flag = 0 ;
mp - > data = NULL ;
mp - > clsn = 0 ;
mp - > log = NULL ;
set_bit ( META_free , & mp - > flag ) ;
init_waitqueue_head ( & mp - > wait ) ;
2005-04-17 02:20:36 +04:00
}
2005-10-21 11:20:48 +04:00
static inline struct metapage * alloc_metapage ( gfp_t gfp_mask )
2005-04-17 02:20:36 +04:00
{
return mempool_alloc ( metapage_mempool , gfp_mask ) ;
}
static inline void free_metapage ( struct metapage * mp )
{
mp - > flag = 0 ;
set_bit ( META_free , & mp - > flag ) ;
mempool_free ( mp , metapage_mempool ) ;
}
int __init metapage_init ( void )
{
/*
* Allocate the metapage structures
*/
metapage_cache = kmem_cache_create ( " jfs_mp " , sizeof ( struct metapage ) ,
2007-07-20 05:11:58 +04:00
0 , 0 , init_once ) ;
2005-04-17 02:20:36 +04:00
if ( metapage_cache = = NULL )
return - ENOMEM ;
2006-03-26 13:37:50 +04:00
metapage_mempool = mempool_create_slab_pool ( METAPOOL_MIN_PAGES ,
metapage_cache ) ;
2005-04-17 02:20:36 +04:00
if ( metapage_mempool = = NULL ) {
kmem_cache_destroy ( metapage_cache ) ;
return - ENOMEM ;
}
return 0 ;
}
void metapage_exit ( void )
{
mempool_destroy ( metapage_mempool ) ;
kmem_cache_destroy ( metapage_cache ) ;
}
2005-05-02 22:25:02 +04:00
static inline void drop_metapage ( struct page * page , struct metapage * mp )
{
if ( mp - > count | | mp - > nohomeok | | test_bit ( META_dirty , & mp - > flag ) | |
test_bit ( META_io , & mp - > flag ) )
return ;
remove_metapage ( page , mp ) ;
INCREMENT ( mpStat . pagefree ) ;
free_metapage ( mp ) ;
}
2005-04-17 02:20:36 +04:00
/*
2005-05-02 22:25:02 +04:00
* Metapage address space operations
2005-04-17 02:20:36 +04:00
*/
2005-05-02 22:25:02 +04:00
static sector_t metapage_get_blocks ( struct inode * inode , sector_t lblock ,
2008-01-11 01:04:25 +03:00
int * len )
2005-04-17 02:20:36 +04:00
{
2005-05-02 22:25:02 +04:00
int rc = 0 ;
int xflag ;
s64 xaddr ;
2006-09-27 12:50:49 +04:00
sector_t file_blocks = ( inode - > i_size + inode - > i_sb - > s_blocksize - 1 ) > >
2005-05-02 22:25:02 +04:00
inode - > i_blkbits ;
if ( lblock > = file_blocks )
return 0 ;
if ( lblock + * len > file_blocks )
* len = file_blocks - lblock ;
if ( inode - > i_ino ) {
rc = xtLookup ( inode , ( s64 ) lblock , * len , & xflag , & xaddr , len , 0 ) ;
if ( ( rc = = 0 ) & & * len )
lblock = ( sector_t ) xaddr ;
else
lblock = 0 ;
} /* else no mapping */
return lblock ;
2005-04-17 02:20:36 +04:00
}
2005-05-02 22:25:02 +04:00
static void last_read_complete ( struct page * page )
2005-04-17 02:20:36 +04:00
{
2005-05-02 22:25:02 +04:00
if ( ! PageError ( page ) )
SetPageUptodate ( page ) ;
unlock_page ( page ) ;
}
2007-09-27 14:47:43 +04:00
static void metapage_read_end_io ( struct bio * bio , int err )
2005-05-02 22:25:02 +04:00
{
struct page * page = bio - > bi_private ;
if ( ! test_bit ( BIO_UPTODATE , & bio - > bi_flags ) ) {
printk ( KERN_ERR " metapage_read_end_io: I/O error \n " ) ;
SetPageError ( page ) ;
2005-04-17 02:20:36 +04:00
}
2005-05-02 22:25:02 +04:00
dec_io ( page , last_read_complete ) ;
bio_put ( bio ) ;
2005-04-17 02:20:36 +04:00
}
2005-05-02 22:25:02 +04:00
static void remove_from_logsync ( struct metapage * mp )
2005-04-17 02:20:36 +04:00
{
2005-05-02 22:25:02 +04:00
struct jfs_log * log = mp - > log ;
unsigned long flags ;
/*
* This can race . Recheck that log hasn ' t been set to null , and after
* acquiring logsync lock , recheck lsn
*/
if ( ! log )
return ;
LOGSYNC_LOCK ( log , flags ) ;
if ( mp - > lsn ) {
mp - > log = NULL ;
mp - > lsn = 0 ;
mp - > clsn = 0 ;
log - > count - - ;
list_del ( & mp - > synclist ) ;
}
LOGSYNC_UNLOCK ( log , flags ) ;
}
2005-04-17 02:20:36 +04:00
2005-05-02 22:25:02 +04:00
static void last_write_complete ( struct page * page )
{
struct metapage * mp ;
unsigned int offset ;
for ( offset = 0 ; offset < PAGE_CACHE_SIZE ; offset + = PSIZE ) {
mp = page_to_mp ( page , offset ) ;
if ( mp & & test_bit ( META_io , & mp - > flag ) ) {
if ( mp - > lsn )
remove_from_logsync ( mp ) ;
clear_bit ( META_io , & mp - > flag ) ;
}
/*
* I ' d like to call drop_metapage here , but I don ' t think it ' s
* safe unless I have the page locked
*/
}
end_page_writeback ( page ) ;
2005-04-17 02:20:36 +04:00
}
2007-09-27 14:47:43 +04:00
static void metapage_write_end_io ( struct bio * bio , int err )
2005-04-17 02:20:36 +04:00
{
2005-05-02 22:25:02 +04:00
struct page * page = bio - > bi_private ;
BUG_ON ( ! PagePrivate ( page ) ) ;
if ( ! test_bit ( BIO_UPTODATE , & bio - > bi_flags ) ) {
printk ( KERN_ERR " metapage_write_end_io: I/O error \n " ) ;
SetPageError ( page ) ;
}
dec_io ( page , last_write_complete ) ;
bio_put ( bio ) ;
}
static int metapage_writepage ( struct page * page , struct writeback_control * wbc )
{
struct bio * bio = NULL ;
2008-01-11 01:04:25 +03:00
int block_offset ; /* block offset of mp within page */
2005-05-02 22:25:02 +04:00
struct inode * inode = page - > mapping - > host ;
2008-01-11 01:04:25 +03:00
int blocks_per_mp = JFS_SBI ( inode - > i_sb ) - > nbperpage ;
int len ;
int xlen ;
2005-05-02 22:25:02 +04:00
struct metapage * mp ;
int redirty = 0 ;
sector_t lblock ;
2008-01-03 22:09:33 +03:00
int nr_underway = 0 ;
2005-05-02 22:25:02 +04:00
sector_t pblock ;
sector_t next_block = 0 ;
sector_t page_start ;
unsigned long bio_bytes = 0 ;
unsigned long bio_offset = 0 ;
2008-01-11 01:04:25 +03:00
int offset ;
2005-05-02 22:25:02 +04:00
page_start = ( sector_t ) page - > index < <
( PAGE_CACHE_SHIFT - inode - > i_blkbits ) ;
BUG_ON ( ! PageLocked ( page ) ) ;
BUG_ON ( PageWriteback ( page ) ) ;
2008-01-03 22:09:33 +03:00
set_page_writeback ( page ) ;
2005-05-02 22:25:02 +04:00
for ( offset = 0 ; offset < PAGE_CACHE_SIZE ; offset + = PSIZE ) {
mp = page_to_mp ( page , offset ) ;
if ( ! mp | | ! test_bit ( META_dirty , & mp - > flag ) )
continue ;
if ( mp - > nohomeok & & ! test_bit ( META_forcewrite , & mp - > flag ) ) {
redirty = 1 ;
2005-10-04 00:32:11 +04:00
/*
* Make sure this page isn ' t blocked indefinitely .
* If the journal isn ' t undergoing I / O , push it
*/
if ( mp - > log & & ! ( mp - > log - > cflag & logGC_PAGEOUT ) )
jfs_flush_journal ( mp - > log , 0 ) ;
2005-05-02 22:25:02 +04:00
continue ;
}
clear_bit ( META_dirty , & mp - > flag ) ;
block_offset = offset > > inode - > i_blkbits ;
lblock = page_start + block_offset ;
if ( bio ) {
if ( xlen & & lblock = = next_block ) {
/* Contiguous, in memory & on disk */
len = min ( xlen , blocks_per_mp ) ;
xlen - = len ;
bio_bytes + = len < < inode - > i_blkbits ;
set_bit ( META_io , & mp - > flag ) ;
continue ;
}
/* Not contiguous */
if ( bio_add_page ( bio , page , bio_bytes , bio_offset ) <
bio_bytes )
goto add_failed ;
/*
* Increment counter before submitting i / o to keep
* count from hitting zero before we ' re through
*/
inc_io ( page ) ;
if ( ! bio - > bi_size )
goto dump_bio ;
submit_bio ( WRITE , bio ) ;
2008-01-03 22:09:33 +03:00
nr_underway + + ;
2005-05-02 22:25:02 +04:00
bio = NULL ;
2008-01-03 22:09:33 +03:00
} else
2005-05-02 22:25:02 +04:00
inc_io ( page ) ;
xlen = ( PAGE_CACHE_SIZE - offset ) > > inode - > i_blkbits ;
pblock = metapage_get_blocks ( inode , lblock , & xlen ) ;
if ( ! pblock ) {
/* Need better error handling */
printk ( KERN_ERR " JFS: metapage_get_blocks failed \n " ) ;
dec_io ( page , last_write_complete ) ;
continue ;
}
set_bit ( META_io , & mp - > flag ) ;
2008-01-11 01:04:25 +03:00
len = min ( xlen , ( int ) JFS_SBI ( inode - > i_sb ) - > nbperpage ) ;
2005-05-02 22:25:02 +04:00
bio = bio_alloc ( GFP_NOFS , 1 ) ;
bio - > bi_bdev = inode - > i_sb - > s_bdev ;
bio - > bi_sector = pblock < < ( inode - > i_blkbits - 9 ) ;
bio - > bi_end_io = metapage_write_end_io ;
bio - > bi_private = page ;
/* Don't call bio_add_page yet, we may add to this vec */
bio_offset = offset ;
bio_bytes = len < < inode - > i_blkbits ;
xlen - = len ;
next_block = lblock + len ;
}
if ( bio ) {
if ( bio_add_page ( bio , page , bio_bytes , bio_offset ) < bio_bytes )
goto add_failed ;
if ( ! bio - > bi_size )
goto dump_bio ;
2006-10-02 18:55:27 +04:00
2005-05-02 22:25:02 +04:00
submit_bio ( WRITE , bio ) ;
2008-01-03 22:09:33 +03:00
nr_underway + + ;
2005-05-02 22:25:02 +04:00
}
if ( redirty )
redirty_page_for_writepage ( wbc , page ) ;
unlock_page ( page ) ;
2008-01-03 22:09:33 +03:00
if ( nr_underway = = 0 )
end_page_writeback ( page ) ;
2005-05-02 22:25:02 +04:00
return 0 ;
add_failed :
/* We should never reach here, since we're only adding one vec */
printk ( KERN_ERR " JFS: bio_add_page failed unexpectedly \n " ) ;
goto skip ;
dump_bio :
2007-06-13 19:17:50 +04:00
print_hex_dump ( KERN_ERR , " JFS: dump of bio: " , DUMP_PREFIX_ADDRESS , 16 ,
4 , bio , sizeof ( * bio ) , 0 ) ;
2005-05-02 22:25:02 +04:00
skip :
bio_put ( bio ) ;
unlock_page ( page ) ;
dec_io ( page , last_write_complete ) ;
return - EIO ;
}
static int metapage_readpage ( struct file * fp , struct page * page )
{
struct inode * inode = page - > mapping - > host ;
struct bio * bio = NULL ;
2008-01-11 01:04:25 +03:00
int block_offset ;
int blocks_per_page = PAGE_CACHE_SIZE > > inode - > i_blkbits ;
2005-05-02 22:25:02 +04:00
sector_t page_start ; /* address of page in fs blocks */
sector_t pblock ;
2008-01-11 01:04:25 +03:00
int xlen ;
2005-05-02 22:25:02 +04:00
unsigned int len ;
2008-01-11 01:04:25 +03:00
int offset ;
2005-05-02 22:25:02 +04:00
BUG_ON ( ! PageLocked ( page ) ) ;
page_start = ( sector_t ) page - > index < <
( PAGE_CACHE_SHIFT - inode - > i_blkbits ) ;
block_offset = 0 ;
while ( block_offset < blocks_per_page ) {
xlen = blocks_per_page - block_offset ;
pblock = metapage_get_blocks ( inode , page_start + block_offset ,
& xlen ) ;
if ( pblock ) {
if ( ! PagePrivate ( page ) )
insert_metapage ( page , NULL ) ;
inc_io ( page ) ;
if ( bio )
submit_bio ( READ , bio ) ;
bio = bio_alloc ( GFP_NOFS , 1 ) ;
bio - > bi_bdev = inode - > i_sb - > s_bdev ;
bio - > bi_sector = pblock < < ( inode - > i_blkbits - 9 ) ;
bio - > bi_end_io = metapage_read_end_io ;
bio - > bi_private = page ;
len = xlen < < inode - > i_blkbits ;
offset = block_offset < < inode - > i_blkbits ;
if ( bio_add_page ( bio , page , len , offset ) < len )
goto add_failed ;
block_offset + = xlen ;
} else
block_offset + + ;
2005-04-17 02:20:36 +04:00
}
2005-05-02 22:25:02 +04:00
if ( bio )
submit_bio ( READ , bio ) ;
else
unlock_page ( page ) ;
return 0 ;
2005-04-17 02:20:36 +04:00
2005-05-02 22:25:02 +04:00
add_failed :
printk ( KERN_ERR " JFS: bio_add_page failed unexpectedly \n " ) ;
bio_put ( bio ) ;
dec_io ( page , last_read_complete ) ;
return - EIO ;
2005-04-17 02:20:36 +04:00
}
2005-10-21 11:20:48 +04:00
static int metapage_releasepage ( struct page * page , gfp_t gfp_mask )
2005-05-02 22:25:02 +04:00
{
struct metapage * mp ;
2006-05-24 16:43:38 +04:00
int ret = 1 ;
2008-01-11 01:04:25 +03:00
int offset ;
2005-05-02 22:25:02 +04:00
for ( offset = 0 ; offset < PAGE_CACHE_SIZE ; offset + = PSIZE ) {
mp = page_to_mp ( page , offset ) ;
if ( ! mp )
continue ;
jfs_info ( " metapage_releasepage: mp = 0x%p " , mp ) ;
2006-05-24 16:43:38 +04:00
if ( mp - > count | | mp - > nohomeok | |
test_bit ( META_dirty , & mp - > flag ) ) {
2005-05-02 22:25:02 +04:00
jfs_info ( " count = %ld, nohomeok = %d " , mp - > count ,
mp - > nohomeok ) ;
2006-05-24 16:43:38 +04:00
ret = 0 ;
2005-05-02 22:25:02 +04:00
continue ;
}
if ( mp - > lsn )
remove_from_logsync ( mp ) ;
remove_metapage ( page , mp ) ;
INCREMENT ( mpStat . pagefree ) ;
free_metapage ( mp ) ;
}
2006-05-24 16:43:38 +04:00
return ret ;
2005-05-02 22:25:02 +04:00
}
2006-03-26 13:37:18 +04:00
static void metapage_invalidatepage ( struct page * page , unsigned long offset )
2005-05-02 22:25:02 +04:00
{
BUG_ON ( offset ) ;
2006-03-26 13:37:18 +04:00
BUG_ON ( PageWriteback ( page ) ) ;
2005-05-02 22:25:02 +04:00
2006-03-26 13:37:18 +04:00
metapage_releasepage ( page , 0 ) ;
2005-05-02 22:25:02 +04:00
}
2006-06-28 15:26:44 +04:00
const struct address_space_operations jfs_metapage_aops = {
2005-05-02 22:25:02 +04:00
. readpage = metapage_readpage ,
. writepage = metapage_writepage ,
. sync_page = block_sync_page ,
. releasepage = metapage_releasepage ,
. invalidatepage = metapage_invalidatepage ,
. set_page_dirty = __set_page_dirty_nobuffers ,
} ;
2005-04-17 02:20:36 +04:00
struct metapage * __get_metapage ( struct inode * inode , unsigned long lblock ,
unsigned int size , int absolute ,
unsigned long new )
{
int l2BlocksPerPage ;
int l2bsize ;
struct address_space * mapping ;
2005-05-02 22:25:02 +04:00
struct metapage * mp = NULL ;
struct page * page ;
2005-04-17 02:20:36 +04:00
unsigned long page_index ;
unsigned long page_offset ;
2005-05-02 22:25:02 +04:00
jfs_info ( " __get_metapage: ino = %ld, lblock = 0x%lx, abs=%d " ,
inode - > i_ino , lblock , absolute ) ;
l2bsize = inode - > i_blkbits ;
l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize ;
page_index = lblock > > l2BlocksPerPage ;
page_offset = ( lblock - ( page_index < < l2BlocksPerPage ) ) < < l2bsize ;
if ( ( page_offset + size ) > PAGE_CACHE_SIZE ) {
jfs_err ( " MetaData crosses page boundary!! " ) ;
jfs_err ( " lblock = %lx, size = %d " , lblock , size ) ;
dump_stack ( ) ;
return NULL ;
}
2005-04-17 02:20:36 +04:00
if ( absolute )
2005-05-02 22:25:02 +04:00
mapping = JFS_SBI ( inode - > i_sb ) - > direct_inode - > i_mapping ;
2005-04-17 02:20:36 +04:00
else {
/*
* If an nfs client tries to read an inode that is larger
* than any existing inodes , we may try to read past the
* end of the inode map
*/
if ( ( lblock < < inode - > i_blkbits ) > = inode - > i_size )
return NULL ;
mapping = inode - > i_mapping ;
}
2005-05-02 22:25:02 +04:00
if ( new & & ( PSIZE = = PAGE_CACHE_SIZE ) ) {
page = grab_cache_page ( mapping , page_index ) ;
if ( ! page ) {
jfs_err ( " grab_cache_page failed! " ) ;
return NULL ;
}
SetPageUptodate ( page ) ;
} else {
2006-06-23 13:05:08 +04:00
page = read_mapping_page ( mapping , page_index , NULL ) ;
2005-07-22 20:08:44 +04:00
if ( IS_ERR ( page ) | | ! PageUptodate ( page ) ) {
2006-06-23 13:05:08 +04:00
jfs_err ( " read_mapping_page failed! " ) ;
2005-05-02 22:25:02 +04:00
return NULL ;
}
lock_page ( page ) ;
}
mp = page_to_mp ( page , page_offset ) ;
2005-04-17 02:20:36 +04:00
if ( mp ) {
2005-05-02 22:25:02 +04:00
if ( mp - > logical_size ! = size ) {
jfs_error ( inode - > i_sb ,
" __get_metapage: mp->logical_size != size " ) ;
jfs_err ( " logical_size = %d, size = %d " ,
mp - > logical_size , size ) ;
dump_stack ( ) ;
2006-10-02 18:55:27 +04:00
goto unlock ;
2005-04-17 02:20:36 +04:00
}
mp - > count + + ;
lock_metapage ( mp ) ;
if ( test_bit ( META_discard , & mp - > flag ) ) {
if ( ! new ) {
jfs_error ( inode - > i_sb ,
" __get_metapage: using a "
" discarded metapage " ) ;
2005-05-02 22:25:02 +04:00
discard_metapage ( mp ) ;
2006-10-02 18:55:27 +04:00
goto unlock ;
2005-04-17 02:20:36 +04:00
}
clear_bit ( META_discard , & mp - > flag ) ;
}
} else {
2005-05-02 22:25:02 +04:00
INCREMENT ( mpStat . pagealloc ) ;
mp = alloc_metapage ( GFP_NOFS ) ;
mp - > page = page ;
2005-04-17 02:20:36 +04:00
mp - > flag = 0 ;
mp - > xflag = COMMIT_PAGE ;
mp - > count = 1 ;
2005-05-02 22:25:02 +04:00
mp - > nohomeok = 0 ;
2005-04-17 02:20:36 +04:00
mp - > logical_size = size ;
2005-05-02 22:25:02 +04:00
mp - > data = page_address ( page ) + page_offset ;
mp - > index = lblock ;
if ( unlikely ( insert_metapage ( page , mp ) ) ) {
free_metapage ( mp ) ;
goto unlock ;
2005-04-17 02:20:36 +04:00
}
2005-05-02 22:25:02 +04:00
lock_metapage ( mp ) ;
2005-04-17 02:20:36 +04:00
}
2005-05-02 22:25:02 +04:00
if ( new ) {
jfs_info ( " zeroing mp = 0x%p " , mp ) ;
2005-04-17 02:20:36 +04:00
memset ( mp - > data , 0 , PSIZE ) ;
2005-05-02 22:25:02 +04:00
}
2005-04-17 02:20:36 +04:00
2005-05-02 22:25:02 +04:00
unlock_page ( page ) ;
jfs_info ( " __get_metapage: returning = 0x%p data = 0x%p " , mp , mp - > data ) ;
2005-04-17 02:20:36 +04:00
return mp ;
2005-05-02 22:25:02 +04:00
unlock :
unlock_page ( page ) ;
2005-04-17 02:20:36 +04:00
return NULL ;
}
2005-05-02 22:25:02 +04:00
void grab_metapage ( struct metapage * mp )
2005-04-17 02:20:36 +04:00
{
2005-05-02 22:25:02 +04:00
jfs_info ( " grab_metapage: mp = 0x%p " , mp ) ;
page_cache_get ( mp - > page ) ;
lock_page ( mp - > page ) ;
2005-04-17 02:20:36 +04:00
mp - > count + + ;
2005-05-02 22:25:02 +04:00
lock_metapage ( mp ) ;
unlock_page ( mp - > page ) ;
2005-04-17 02:20:36 +04:00
}
2005-05-02 22:25:02 +04:00
void force_metapage ( struct metapage * mp )
2005-04-17 02:20:36 +04:00
{
2005-05-02 22:25:02 +04:00
struct page * page = mp - > page ;
jfs_info ( " force_metapage: mp = 0x%p " , mp ) ;
set_bit ( META_forcewrite , & mp - > flag ) ;
clear_bit ( META_sync , & mp - > flag ) ;
page_cache_get ( page ) ;
lock_page ( page ) ;
set_page_dirty ( page ) ;
write_one_page ( page , 1 ) ;
clear_bit ( META_forcewrite , & mp - > flag ) ;
page_cache_release ( page ) ;
}
2005-04-17 02:20:36 +04:00
2005-05-05 00:29:35 +04:00
void hold_metapage ( struct metapage * mp )
2005-05-02 22:25:02 +04:00
{
2005-04-17 02:20:36 +04:00
lock_page ( mp - > page ) ;
2005-05-02 22:25:02 +04:00
}
2005-05-05 00:29:35 +04:00
void put_metapage ( struct metapage * mp )
2005-05-02 22:25:02 +04:00
{
if ( mp - > count | | mp - > nohomeok ) {
/* Someone else will release this */
2005-04-17 02:20:36 +04:00
unlock_page ( mp - > page ) ;
return ;
}
2005-05-02 22:25:02 +04:00
page_cache_get ( mp - > page ) ;
mp - > count + + ;
lock_metapage ( mp ) ;
2005-04-17 02:20:36 +04:00
unlock_page ( mp - > page ) ;
2005-05-02 22:25:02 +04:00
release_metapage ( mp ) ;
2005-04-17 02:20:36 +04:00
}
void release_metapage ( struct metapage * mp )
{
2005-05-02 22:25:02 +04:00
struct page * page = mp - > page ;
2005-04-17 02:20:36 +04:00
jfs_info ( " release_metapage: mp = 0x%p, flag = 0x%lx " , mp , mp - > flag ) ;
2005-05-02 22:25:02 +04:00
BUG_ON ( ! page ) ;
lock_page ( page ) ;
unlock_metapage ( mp ) ;
2005-04-17 02:20:36 +04:00
assert ( mp - > count ) ;
2005-05-02 22:25:02 +04:00
if ( - - mp - > count | | mp - > nohomeok ) {
unlock_page ( page ) ;
page_cache_release ( page ) ;
2005-04-17 02:20:36 +04:00
return ;
}
2005-05-02 22:25:02 +04:00
if ( test_bit ( META_dirty , & mp - > flag ) ) {
set_page_dirty ( page ) ;
2005-04-17 02:20:36 +04:00
if ( test_bit ( META_sync , & mp - > flag ) ) {
clear_bit ( META_sync , & mp - > flag ) ;
2005-05-02 22:25:02 +04:00
write_one_page ( page , 1 ) ;
lock_page ( page ) ; /* write_one_page unlocks the page */
2005-04-17 02:20:36 +04:00
}
2005-05-02 22:25:02 +04:00
} else if ( mp - > lsn ) /* discard_metapage doesn't remove it */
remove_from_logsync ( mp ) ;
2005-04-17 02:20:36 +04:00
2005-05-02 22:25:02 +04:00
/* Try to keep metapages from using up too much memory */
drop_metapage ( page , mp ) ;
2006-12-21 00:55:35 +03:00
2005-05-02 22:25:02 +04:00
unlock_page ( page ) ;
page_cache_release ( page ) ;
2005-04-17 02:20:36 +04:00
}
void __invalidate_metapages ( struct inode * ip , s64 addr , int len )
{
2005-05-02 22:25:02 +04:00
sector_t lblock ;
2005-04-17 02:20:36 +04:00
int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip - > i_blkbits ;
2005-05-02 22:25:02 +04:00
int BlocksPerPage = 1 < < l2BlocksPerPage ;
2005-04-17 02:20:36 +04:00
/* All callers are interested in block device's mapping */
2005-05-02 22:25:02 +04:00
struct address_space * mapping =
JFS_SBI ( ip - > i_sb ) - > direct_inode - > i_mapping ;
2005-04-17 02:20:36 +04:00
struct metapage * mp ;
struct page * page ;
2005-05-02 22:25:02 +04:00
unsigned int offset ;
2005-04-17 02:20:36 +04:00
/*
2005-05-02 22:25:02 +04:00
* Mark metapages to discard . They will eventually be
2005-04-17 02:20:36 +04:00
* released , but should not be written .
*/
2005-05-02 22:25:02 +04:00
for ( lblock = addr & ~ ( BlocksPerPage - 1 ) ; lblock < addr + len ;
lblock + = BlocksPerPage ) {
page = find_lock_page ( mapping , lblock > > l2BlocksPerPage ) ;
if ( ! page )
continue ;
for ( offset = 0 ; offset < PAGE_CACHE_SIZE ; offset + = PSIZE ) {
mp = page_to_mp ( page , offset ) ;
if ( ! mp )
continue ;
if ( mp - > index < addr )
continue ;
if ( mp - > index > = addr + len )
break ;
2005-04-17 02:20:36 +04:00
clear_bit ( META_dirty , & mp - > flag ) ;
set_bit ( META_discard , & mp - > flag ) ;
2005-05-02 22:25:02 +04:00
if ( mp - > lsn )
remove_from_logsync ( mp ) ;
2005-04-17 02:20:36 +04:00
}
2005-05-02 22:25:02 +04:00
unlock_page ( page ) ;
page_cache_release ( page ) ;
2005-04-17 02:20:36 +04:00
}
}
# ifdef CONFIG_JFS_STATISTICS
int jfs_mpstat_read ( char * buffer , char * * start , off_t offset , int length ,
int * eof , void * data )
{
int len = 0 ;
off_t begin ;
len + = sprintf ( buffer ,
" JFS Metapage statistics \n "
" ======================= \n "
" page allocations = %d \n "
" page frees = %d \n "
" lock waits = %d \n " ,
mpStat . pagealloc ,
mpStat . pagefree ,
mpStat . lockwait ) ;
begin = offset ;
* start = buffer + begin ;
len - = begin ;
if ( len > length )
len = length ;
else
* eof = 1 ;
if ( len < 0 )
len = 0 ;
return len ;
}
# endif