2019-05-19 13:08:55 +01:00
// SPDX-License-Identifier: GPL-2.0-only
2005-06-21 17:17:14 -07:00
/*
* bitmap . c two - level bitmap ( C ) Peter T . Breuer ( ptb @ ot . uc3m . es ) 2003
*
* bitmap_create - sets up the bitmap structure
* bitmap_destroy - destroys the bitmap structure
*
* additions , Copyright ( C ) 2003 - 2004 , Paul Clements , SteelEye Technology , Inc . :
* - added disk storage for bitmap
* - changes to allow various bitmap chunk sizes
*/
/*
* Still to do :
*
* flush after percent set rather than just time based . ( maybe both ) .
*/
2009-03-31 14:33:13 +11:00
# include <linux/blkdev.h>
2005-06-21 17:17:14 -07:00
# include <linux/module.h>
# include <linux/errno.h>
# include <linux/slab.h>
# include <linux/init.h>
# include <linux/timer.h>
# include <linux/sched.h>
# include <linux/list.h>
# include <linux/file.h>
# include <linux/mount.h>
# include <linux/buffer_head.h>
2012-03-19 12:46:40 +11:00
# include <linux/seq_file.h>
2016-11-14 16:30:21 +11:00
# include <trace/events/block.h>
2009-03-31 14:33:13 +11:00
# include "md.h"
2017-10-10 17:02:41 -04:00
# include "md-bitmap.h"
2005-06-21 17:17:14 -07:00
2010-06-01 19:37:31 +10:00
static inline char * bmname ( struct bitmap * bitmap )
2005-06-21 17:17:14 -07:00
{
return bitmap - > mddev ? mdname ( bitmap - > mddev ) : " mdX " ;
}
/*
* check a page and , if necessary , allocate it ( or hijack it if the alloc fails )
*
* 1 ) check to see if this page is allocated , if it ' s not then try to alloc
* 2 ) if the alloc fails , set the page ' s hijacked flag so we ' ll use the
* page pointer directly as a counter
*
* if we find our page , we increment the page ' s refcount so that it stays
* allocated while we ' re using it
*/
2018-08-01 15:20:50 -07:00
static int md_bitmap_checkpage ( struct bitmap_counts * bitmap ,
unsigned long page , int create , int no_hijack )
2009-09-23 18:06:44 +10:00
__releases ( bitmap - > lock )
__acquires ( bitmap - > lock )
2005-06-21 17:17:14 -07:00
{
unsigned char * mappage ;
2023-05-15 21:48:05 +08:00
WARN_ON_ONCE ( page > = bitmap - > pages ) ;
2005-06-21 17:17:14 -07:00
if ( bitmap - > bp [ page ] . hijacked ) /* it's hijacked, don't try to alloc */
return 0 ;
if ( bitmap - > bp [ page ] . map ) /* page is already allocated, just return */
return 0 ;
if ( ! create )
return - ENOENT ;
/* this page has not been allocated yet */
2010-06-01 19:37:31 +10:00
spin_unlock_irq ( & bitmap - > lock ) ;
2015-02-02 17:08:03 +11:00
/* It is possible that this is being called inside a
* prepare_to_wait / finish_wait loop from raid5c : make_request ( ) .
* In general it is not permitted to sleep in that context as it
* can cause the loop to spin freely .
* That doesn ' t apply here as we can only reach this point
* once with any loop .
* When this function completes , either bp [ page ] . map or
* bp [ page ] . hijacked . In either case , this function will
* abort before getting to this point again . So there is
* no risk of a free - spin , and so it is safe to assert
* that sleeping here is allowed .
*/
sched_annotate_sleep ( ) ;
2012-03-19 12:46:41 +11:00
mappage = kzalloc ( PAGE_SIZE , GFP_NOIO ) ;
2010-06-01 19:37:31 +10:00
spin_lock_irq ( & bitmap - > lock ) ;
if ( mappage = = NULL ) {
2012-05-22 13:55:24 +10:00
pr_debug ( " md/bitmap: map page allocation failed, hijacking \n " ) ;
2016-05-02 11:50:11 -04:00
/* We don't support hijack for cluster raid */
if ( no_hijack )
return - ENOMEM ;
2005-06-21 17:17:14 -07:00
/* failed - set the hijacked flag so that we can use the
* pointer as a counter */
if ( ! bitmap - > bp [ page ] . map )
bitmap - > bp [ page ] . hijacked = 1 ;
2010-06-01 19:37:31 +10:00
} else if ( bitmap - > bp [ page ] . map | |
bitmap - > bp [ page ] . hijacked ) {
2005-06-21 17:17:14 -07:00
/* somebody beat us to getting the page */
2012-03-19 12:46:41 +11:00
kfree ( mappage ) ;
2010-06-01 19:37:31 +10:00
} else {
2005-06-21 17:17:14 -07:00
2010-06-01 19:37:31 +10:00
/* no page was in place and we have one, so install it */
2005-06-21 17:17:14 -07:00
2010-06-01 19:37:31 +10:00
bitmap - > bp [ page ] . map = mappage ;
bitmap - > missing_pages - - ;
}
2005-06-21 17:17:14 -07:00
return 0 ;
}
/* if page is completely empty, put it back on the free list, or dealloc it */
/* if page was hijacked, unmark the flag so it might get alloced next time */
/* Note: lock should be held when calling this */
2018-08-01 15:20:50 -07:00
static void md_bitmap_checkfree ( struct bitmap_counts * bitmap , unsigned long page )
2005-06-21 17:17:14 -07:00
{
char * ptr ;
if ( bitmap - > bp [ page ] . count ) /* page is still busy */
return ;
/* page is no longer in use, it can be released */
if ( bitmap - > bp [ page ] . hijacked ) { /* page was hijacked, undo this now */
bitmap - > bp [ page ] . hijacked = 0 ;
bitmap - > bp [ page ] . map = NULL ;
2010-06-01 19:37:31 +10:00
} else {
/* normal case, free the page */
ptr = bitmap - > bp [ page ] . map ;
bitmap - > bp [ page ] . map = NULL ;
bitmap - > missing_pages + + ;
2012-03-19 12:46:41 +11:00
kfree ( ptr ) ;
2005-06-21 17:17:14 -07:00
}
}
/*
* bitmap file handling - read and write the bitmap file and its superblock
*/
/*
* basic page I / O operations
*/
2005-06-21 17:17:27 -07:00
/* IO operations when bitmap is stored near all superblocks */
2023-06-15 08:48:36 +02:00
/* choose a good rdev and read the page from there */
2012-05-22 13:55:08 +10:00
static int read_sb_page ( struct mddev * mddev , loff_t offset ,
2023-06-15 08:48:36 +02:00
struct page * page , unsigned long index , int size )
2005-06-21 17:17:27 -07:00
{
2023-06-15 08:48:37 +02:00
sector_t sector = mddev - > bitmap_info . offset + offset +
index * ( PAGE_SIZE / SECTOR_SIZE ) ;
2011-10-11 16:45:26 +11:00
struct md_rdev * rdev ;
2005-06-21 17:17:27 -07:00
2012-03-19 12:46:39 +11:00
rdev_for_each ( rdev , mddev ) {
2023-06-15 08:48:36 +02:00
u32 iosize = roundup ( size , bdev_logical_block_size ( rdev - > bdev ) ) ;
2005-09-09 16:23:52 -07:00
2023-06-15 08:48:36 +02:00
if ( ! test_bit ( In_sync , & rdev - > flags ) | |
test_bit ( Faulty , & rdev - > flags ) | |
test_bit ( Bitmap_sync , & rdev - > flags ) )
continue ;
2005-06-21 17:17:27 -07:00
2023-06-15 08:48:38 +02:00
if ( sync_page_io ( rdev , sector , iosize , page , REQ_OP_READ , true ) )
2012-05-22 13:55:08 +10:00
return 0 ;
2005-09-09 16:23:52 -07:00
}
2012-05-22 13:55:08 +10:00
return - EIO ;
2005-06-21 17:17:27 -07:00
}
2011-10-11 16:47:53 +11:00
static struct md_rdev * next_active_rdev ( struct md_rdev * rdev , struct mddev * mddev )
2008-09-01 12:48:13 +10:00
{
/* Iterate the disks of an mddev, using rcu to protect access to the
* linked list , and raising the refcount of devices we return to ensure
* they don ' t disappear while in use .
* As devices are only added or removed when raid_disk is < 0 and
* nr_pending is 0 and In_sync is clear , the entries we return will
* still be in the same position on the list when we re - enter
2012-10-11 13:43:21 +11:00
* list_for_each_entry_continue_rcu .
2015-05-20 15:05:09 +10:00
*
* Note that if entered with ' rdev = = NULL ' to start at the
* beginning , we temporarily assign ' rdev ' to an address which
* isn ' t really an rdev , but which can be used by
* list_for_each_entry_continue_rcu ( ) to find the first entry .
2008-09-01 12:48:13 +10:00
*/
rcu_read_lock ( ) ;
if ( rdev = = NULL )
/* start at the beginning */
2015-05-20 15:05:09 +10:00
rdev = list_entry ( & mddev - > disks , struct md_rdev , same_set ) ;
2008-09-01 12:48:13 +10:00
else {
/* release the previous rdev and start from there. */
rdev_dec_pending ( rdev , mddev ) ;
}
2012-10-11 13:43:21 +11:00
list_for_each_entry_continue_rcu ( rdev , & mddev - > disks , same_set ) {
2008-09-01 12:48:13 +10:00
if ( rdev - > raid_disk > = 0 & &
! test_bit ( Faulty , & rdev - > flags ) ) {
/* this is a usable devices */
atomic_inc ( & rdev - > nr_pending ) ;
rcu_read_unlock ( ) ;
return rdev ;
}
}
rcu_read_unlock ( ) ;
return NULL ;
}
2023-02-24 11:33:23 -07:00
static unsigned int optimal_io_size ( struct block_device * bdev ,
unsigned int last_page_size ,
unsigned int io_size )
{
if ( bdev_io_opt ( bdev ) > bdev_logical_block_size ( bdev ) )
return roundup ( last_page_size , bdev_io_opt ( bdev ) ) ;
return io_size ;
}
static unsigned int bitmap_io_size ( unsigned int io_size , unsigned int opt_size ,
2023-04-24 19:14:38 -06:00
loff_t start , loff_t boundary )
2023-02-24 11:33:23 -07:00
{
if ( io_size ! = opt_size & &
start + opt_size / SECTOR_SIZE < = boundary )
return opt_size ;
if ( start + io_size / SECTOR_SIZE < = boundary )
return io_size ;
/* Overflows boundary */
return 0 ;
}
2023-02-24 11:33:21 -07:00
static int __write_sb_page ( struct md_rdev * rdev , struct bitmap * bitmap ,
2023-06-15 08:48:38 +02:00
unsigned long pg_index , struct page * page )
2005-06-21 17:17:27 -07:00
{
2011-01-14 09:14:34 +11:00
struct block_device * bdev ;
2011-10-11 16:47:53 +11:00
struct mddev * mddev = bitmap - > mddev ;
2012-05-22 13:55:10 +10:00
struct bitmap_storage * store = & bitmap - > storage ;
md/md-bitmap: fix writing non bitmap pages
__write_sb_page() rounds up the io size to the optimal io size if it
doesn't exceed the data offset, but it doesn't check the final size
exceeds the bitmap length.
For example:
page count - 1
page size - 4K
data offset - 1M
optimal io size - 256K
The final io size would be 256K (64 pages) but md_bitmap_storage_alloc()
allocated 1 page, the IO would write 1 valid page and 63 pages that
happens to be allocated afterwards. This leaks memory to the raid device
superblock.
This issue caused a data transfer failure in nvme-tcp. The network
drivers checks the first page of an IO with sendpage_ok(), it returns
true if the page isn't a slabpage and refcount >= 1. If the page
!sendpage_ok() the network driver disables MSG_SPLICE_PAGES.
As of now the network layer assumes all the pages of the IO are
sendpage_ok() when MSG_SPLICE_PAGES is on.
The bitmap pages aren't slab pages, the first page of the IO is
sendpage_ok(), but the additional pages that happens to be allocated
after the bitmap pages might be !sendpage_ok(). That cause
skb_splice_from_iter() to stop the data transfer, in the case below it
hangs 'mdadm --create'.
The bug is reproducible, in order to reproduce we need nvme-over-tcp
controllers with optimal IO size bigger than PAGE_SIZE. Creating a raid
with bitmap over those devices reproduces the bug.
In order to simulate large optimal IO size you can use dm-stripe with a
single device.
Script to reproduce the issue on top of brd devices using dm-stripe is
attached below (will be added to blktest).
I have added some logs to test the theory:
...
md: created bitmap (1 pages) for device md127
__write_sb_page before md_super_write offset: 16, size: 262144. pfn: 0x53ee
=== __write_sb_page before md_super_write. logging pages ===
pfn: 0x53ee, slab: 0 <-- the only page that allocated for the bitmap
pfn: 0x53ef, slab: 1
pfn: 0x53f0, slab: 0
pfn: 0x53f1, slab: 0
pfn: 0x53f2, slab: 0
pfn: 0x53f3, slab: 1
...
nvme_tcp: sendpage_ok - pfn: 0x53ee, len: 262144, offset: 0
skbuff: before sendpage_ok() - pfn: 0x53ee
skbuff: before sendpage_ok() - pfn: 0x53ef
WARNING at net/core/skbuff.c:6848 skb_splice_from_iter+0x142/0x450
skbuff: !sendpage_ok - pfn: 0x53ef. is_slab: 1, page_count: 1
...
Cc: stable@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ofir Gal <ofir.gal@volumez.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240607072748.3182199-1-ofir.gal@volumez.com
2024-06-07 10:27:44 +03:00
unsigned int bitmap_limit = ( bitmap - > storage . file_pages - pg_index ) < <
PAGE_SHIFT ;
2023-04-24 19:14:38 -06:00
loff_t sboff , offset = mddev - > bitmap_info . offset ;
2023-06-15 08:48:38 +02:00
sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE ;
2023-02-24 11:33:22 -07:00
unsigned int size = PAGE_SIZE ;
2023-02-24 11:33:23 -07:00
unsigned int opt_size = PAGE_SIZE ;
2023-06-15 08:48:38 +02:00
sector_t doff ;
2005-06-21 17:17:27 -07:00
2023-02-24 11:33:21 -07:00
bdev = ( rdev - > meta_bdev ) ? rdev - > meta_bdev : rdev - > bdev ;
2024-02-23 20:11:28 +08:00
/* we compare length (page numbers), not page offset. */
if ( ( pg_index - store - > sb_index ) = = store - > file_pages - 1 ) {
2023-02-24 11:33:22 -07:00
unsigned int last_page_size = store - > bytes & ( PAGE_SIZE - 1 ) ;
2011-01-14 09:14:34 +11:00
2023-02-24 11:33:21 -07:00
if ( last_page_size = = 0 )
last_page_size = PAGE_SIZE ;
2023-02-24 11:33:23 -07:00
size = roundup ( last_page_size , bdev_logical_block_size ( bdev ) ) ;
opt_size = optimal_io_size ( bdev , last_page_size , size ) ;
2023-02-24 11:33:21 -07:00
}
2011-01-14 09:14:34 +11:00
2023-02-24 11:33:22 -07:00
sboff = rdev - > sb_start + offset ;
doff = rdev - > data_offset ;
2023-02-24 11:33:21 -07:00
/* Just make sure we aren't corrupting data or metadata */
if ( mddev - > external ) {
/* Bitmap could be anywhere. */
2023-02-24 11:33:22 -07:00
if ( sboff + ps > doff & &
sboff < ( doff + mddev - > dev_sectors + PAGE_SIZE / SECTOR_SIZE ) )
2023-02-24 11:33:21 -07:00
return - EINVAL ;
} else if ( offset < 0 ) {
/* DATA BITMAP METADATA */
2023-02-24 11:33:23 -07:00
size = bitmap_io_size ( size , opt_size , offset + ps , 0 ) ;
if ( size = = 0 )
2023-02-24 11:33:21 -07:00
/* bitmap runs in to metadata */
return - EINVAL ;
2023-02-24 11:33:22 -07:00
if ( doff + mddev - > dev_sectors > sboff )
2023-02-24 11:33:21 -07:00
/* data runs in to bitmap */
return - EINVAL ;
} else if ( rdev - > sb_start < rdev - > data_offset ) {
/* METADATA BITMAP DATA */
2023-02-24 11:33:23 -07:00
size = bitmap_io_size ( size , opt_size , sboff + ps , doff ) ;
if ( size = = 0 )
2023-02-24 11:33:21 -07:00
/* bitmap runs in to data */
return - EINVAL ;
2008-09-01 12:48:13 +10:00
}
2005-06-21 17:17:27 -07:00
md/md-bitmap: fix writing non bitmap pages
__write_sb_page() rounds up the io size to the optimal io size if it
doesn't exceed the data offset, but it doesn't check the final size
exceeds the bitmap length.
For example:
page count - 1
page size - 4K
data offset - 1M
optimal io size - 256K
The final io size would be 256K (64 pages) but md_bitmap_storage_alloc()
allocated 1 page, the IO would write 1 valid page and 63 pages that
happens to be allocated afterwards. This leaks memory to the raid device
superblock.
This issue caused a data transfer failure in nvme-tcp. The network
drivers checks the first page of an IO with sendpage_ok(), it returns
true if the page isn't a slabpage and refcount >= 1. If the page
!sendpage_ok() the network driver disables MSG_SPLICE_PAGES.
As of now the network layer assumes all the pages of the IO are
sendpage_ok() when MSG_SPLICE_PAGES is on.
The bitmap pages aren't slab pages, the first page of the IO is
sendpage_ok(), but the additional pages that happens to be allocated
after the bitmap pages might be !sendpage_ok(). That cause
skb_splice_from_iter() to stop the data transfer, in the case below it
hangs 'mdadm --create'.
The bug is reproducible, in order to reproduce we need nvme-over-tcp
controllers with optimal IO size bigger than PAGE_SIZE. Creating a raid
with bitmap over those devices reproduces the bug.
In order to simulate large optimal IO size you can use dm-stripe with a
single device.
Script to reproduce the issue on top of brd devices using dm-stripe is
attached below (will be added to blktest).
I have added some logs to test the theory:
...
md: created bitmap (1 pages) for device md127
__write_sb_page before md_super_write offset: 16, size: 262144. pfn: 0x53ee
=== __write_sb_page before md_super_write. logging pages ===
pfn: 0x53ee, slab: 0 <-- the only page that allocated for the bitmap
pfn: 0x53ef, slab: 1
pfn: 0x53f0, slab: 0
pfn: 0x53f1, slab: 0
pfn: 0x53f2, slab: 0
pfn: 0x53f3, slab: 1
...
nvme_tcp: sendpage_ok - pfn: 0x53ee, len: 262144, offset: 0
skbuff: before sendpage_ok() - pfn: 0x53ee
skbuff: before sendpage_ok() - pfn: 0x53ef
WARNING at net/core/skbuff.c:6848 skb_splice_from_iter+0x142/0x450
skbuff: !sendpage_ok - pfn: 0x53ef. is_slab: 1, page_count: 1
...
Cc: stable@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ofir Gal <ofir.gal@volumez.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240607072748.3182199-1-ofir.gal@volumez.com
2024-06-07 10:27:44 +03:00
md_super_write ( mddev , rdev , sboff + ps , ( int ) min ( size , bitmap_limit ) , page ) ;
2005-06-21 17:17:27 -07:00
return 0 ;
2023-02-24 11:33:21 -07:00
}
2008-07-21 17:05:25 +10:00
2023-06-15 08:48:38 +02:00
static void write_sb_page ( struct bitmap * bitmap , unsigned long pg_index ,
struct page * page , bool wait )
2023-02-24 11:33:21 -07:00
{
struct mddev * mddev = bitmap - > mddev ;
do {
2023-06-15 08:48:30 +02:00
struct md_rdev * rdev = NULL ;
2023-02-24 11:33:21 -07:00
while ( ( rdev = next_active_rdev ( rdev , mddev ) ) ! = NULL ) {
2023-06-15 08:48:38 +02:00
if ( __write_sb_page ( rdev , bitmap , pg_index , page ) < 0 ) {
2023-06-15 08:48:30 +02:00
set_bit ( BITMAP_WRITE_ERROR , & bitmap - > flags ) ;
return ;
}
2023-02-24 11:33:21 -07:00
}
} while ( wait & & md_super_wait ( mddev ) < 0 ) ;
2005-06-21 17:17:27 -07:00
}
2018-08-01 15:20:50 -07:00
static void md_bitmap_file_kick ( struct bitmap * bitmap ) ;
2005-06-21 17:17:14 -07:00
2023-06-15 08:48:39 +02:00
# ifdef CONFIG_MD_BITMAP_FILE
2023-06-15 08:48:33 +02:00
static void write_file_page ( struct bitmap * bitmap , struct page * page , int wait )
{
struct buffer_head * bh = page_buffers ( page ) ;
2006-06-26 00:27:48 -07:00
2023-06-15 08:48:33 +02:00
while ( bh & & bh - > b_blocknr ) {
atomic_inc ( & bitmap - > pending_writes ) ;
set_buffer_locked ( bh ) ;
set_buffer_mapped ( bh ) ;
submit_bh ( REQ_OP_WRITE | REQ_SYNC , bh ) ;
bh = bh - > b_this_page ;
2005-06-21 17:17:29 -07:00
}
2023-06-15 08:48:33 +02:00
if ( wait )
wait_event ( bitmap - > write_wait ,
atomic_read ( & bitmap - > pending_writes ) = = 0 ) ;
2006-06-26 00:27:48 -07:00
}
static void end_bitmap_write ( struct buffer_head * bh , int uptodate )
{
struct bitmap * bitmap = bh - > b_private ;
2005-06-21 17:17:14 -07:00
2012-05-22 13:55:15 +10:00
if ( ! uptodate )
set_bit ( BITMAP_WRITE_ERROR , & bitmap - > flags ) ;
2006-06-26 00:27:48 -07:00
if ( atomic_dec_and_test ( & bitmap - > pending_writes ) )
wake_up ( & bitmap - > write_wait ) ;
}
2005-06-21 17:17:14 -07:00
2006-06-26 00:27:48 -07:00
static void free_buffers ( struct page * page )
{
2012-05-22 13:55:08 +10:00
struct buffer_head * bh ;
2005-06-21 17:17:21 -07:00
2012-05-22 13:55:08 +10:00
if ( ! PagePrivate ( page ) )
return ;
bh = page_buffers ( page ) ;
2006-06-26 00:27:48 -07:00
while ( bh ) {
struct buffer_head * next = bh - > b_this_page ;
free_buffer_head ( bh ) ;
bh = next ;
2005-06-21 17:17:21 -07:00
}
2020-06-01 21:47:42 -07:00
detach_page_private ( page ) ;
2006-06-26 00:27:48 -07:00
put_page ( page ) ;
2005-06-21 17:17:14 -07:00
}
2006-06-26 00:27:48 -07:00
/* read a page from a file.
* We both read the page , and attach buffers to the page to record the
* address of each block ( using bmap ) . These addresses will be used
* to write the block later , completely bypassing the filesystem .
* This usage is similar to how swap files are handled , and allows us
* to write to a file with no concerns of memory allocation failing .
*/
2023-06-15 08:48:34 +02:00
static int read_file_page ( struct file * file , unsigned long index ,
struct bitmap * bitmap , unsigned long count , struct page * page )
2005-06-21 17:17:14 -07:00
{
2012-05-22 13:55:08 +10:00
int ret = 0 ;
2013-01-23 17:07:38 -05:00
struct inode * inode = file_inode ( file ) ;
2006-06-26 00:27:48 -07:00
struct buffer_head * bh ;
2020-01-09 14:30:41 +01:00
sector_t block , blk_cur ;
2020-08-18 13:42:06 +08:00
unsigned long blocksize = i_blocksize ( inode ) ;
2005-06-21 17:17:14 -07:00
2011-10-07 14:23:17 +11:00
pr_debug ( " read bitmap file (%dB @ %llu) \n " , ( int ) PAGE_SIZE ,
( unsigned long long ) index < < PAGE_SHIFT ) ;
2005-06-21 17:17:14 -07:00
2020-08-18 13:42:06 +08:00
bh = alloc_page_buffers ( page , blocksize , false ) ;
2006-06-26 00:27:48 -07:00
if ( ! bh ) {
2012-05-22 13:55:08 +10:00
ret = - ENOMEM ;
2005-06-21 17:17:14 -07:00
goto out ;
}
2020-06-01 21:47:42 -07:00
attach_page_private ( page , bh ) ;
2020-01-09 14:30:41 +01:00
blk_cur = index < < ( PAGE_SHIFT - inode - > i_blkbits ) ;
2006-06-26 00:27:48 -07:00
while ( bh ) {
2020-01-09 14:30:41 +01:00
block = blk_cur ;
2006-06-26 00:27:48 -07:00
if ( count = = 0 )
bh - > b_blocknr = 0 ;
else {
2020-01-09 14:30:41 +01:00
ret = bmap ( inode , & block ) ;
if ( ret | | ! block ) {
2012-05-22 13:55:08 +10:00
ret = - EINVAL ;
2020-01-09 14:30:41 +01:00
bh - > b_blocknr = 0 ;
2006-06-26 00:27:48 -07:00
goto out ;
}
2020-01-09 14:30:41 +01:00
bh - > b_blocknr = block ;
2006-06-26 00:27:48 -07:00
bh - > b_bdev = inode - > i_sb - > s_bdev ;
2020-08-18 13:42:06 +08:00
if ( count < blocksize )
2006-06-26 00:27:48 -07:00
count = 0 ;
else
2020-08-18 13:42:06 +08:00
count - = blocksize ;
2006-06-26 00:27:48 -07:00
bh - > b_end_io = end_bitmap_write ;
bh - > b_private = bitmap ;
2006-06-26 00:27:49 -07:00
atomic_inc ( & bitmap - > pending_writes ) ;
set_buffer_locked ( bh ) ;
set_buffer_mapped ( bh ) ;
2022-07-14 11:07:13 -07:00
submit_bh ( REQ_OP_READ , bh ) ;
2006-06-26 00:27:48 -07:00
}
2020-01-09 14:30:41 +01:00
blk_cur + + ;
2006-06-26 00:27:48 -07:00
bh = bh - > b_this_page ;
}
2006-06-26 00:27:49 -07:00
wait_event ( bitmap - > write_wait ,
atomic_read ( & bitmap - > pending_writes ) = = 0 ) ;
2012-05-22 13:55:15 +10:00
if ( test_bit ( BITMAP_WRITE_ERROR , & bitmap - > flags ) )
2012-05-22 13:55:08 +10:00
ret = - EIO ;
2005-06-21 17:17:14 -07:00
out :
2012-05-22 13:55:08 +10:00
if ( ret )
2016-11-02 14:16:49 +11:00
pr_err ( " md: bitmap read error: (%dB @ %llu): %d \n " ,
( int ) PAGE_SIZE ,
( unsigned long long ) index < < PAGE_SHIFT ,
ret ) ;
2012-05-22 13:55:08 +10:00
return ret ;
2005-06-21 17:17:14 -07:00
}
2023-06-15 08:48:39 +02:00
# else /* CONFIG_MD_BITMAP_FILE */
static void write_file_page ( struct bitmap * bitmap , struct page * page , int wait )
{
}
static int read_file_page ( struct file * file , unsigned long index ,
struct bitmap * bitmap , unsigned long count , struct page * page )
{
return - EIO ;
}
static void free_buffers ( struct page * page )
{
put_page ( page ) ;
}
# endif /* CONFIG_MD_BITMAP_FILE */
2005-06-21 17:17:14 -07:00
/*
* bitmap file superblock operations
*/
2023-06-15 08:48:33 +02:00
/*
* write out a page to a file
*/
2023-06-15 08:48:38 +02:00
static void filemap_write_page ( struct bitmap * bitmap , unsigned long pg_index ,
bool wait )
2023-06-15 08:48:33 +02:00
{
2023-06-15 08:48:38 +02:00
struct bitmap_storage * store = & bitmap - > storage ;
struct page * page = store - > filemap [ pg_index ] ;
if ( mddev_is_clustered ( bitmap - > mddev ) ) {
2024-02-23 20:11:28 +08:00
/* go to node bitmap area starting point */
pg_index + = store - > sb_index ;
2023-06-15 08:48:38 +02:00
}
if ( store - > file )
2023-06-15 08:48:33 +02:00
write_file_page ( bitmap , page , wait ) ;
else
2023-06-15 08:48:38 +02:00
write_sb_page ( bitmap , pg_index , page , wait ) ;
2023-06-15 08:48:33 +02:00
}
2016-11-04 16:46:03 +11:00
/*
2018-08-01 15:20:50 -07:00
* md_bitmap_wait_writes ( ) should be called before writing any bitmap
2016-11-04 16:46:03 +11:00
* blocks , to ensure previous writes , particularly from
2018-08-01 15:20:50 -07:00
* md_bitmap_daemon_work ( ) , have completed .
2016-11-04 16:46:03 +11:00
*/
2018-08-01 15:20:50 -07:00
static void md_bitmap_wait_writes ( struct bitmap * bitmap )
2016-11-04 16:46:03 +11:00
{
if ( bitmap - > storage . file )
wait_event ( bitmap - > write_wait ,
atomic_read ( & bitmap - > pending_writes ) = = 0 ) ;
else
2016-11-18 16:16:11 +11:00
/* Note that we ignore the return value. The writes
* might have failed , but that would just mean that
* some bits which should be cleared haven ' t been ,
* which is safe . The relevant bitmap blocks will
* probably get written again , but there is no great
* loss if they aren ' t .
*/
2016-11-04 16:46:03 +11:00
md_super_wait ( bitmap - > mddev ) ;
}
2005-06-21 17:17:14 -07:00
/* update the event counter and sync the superblock to disk */
2018-08-01 15:20:50 -07:00
void md_bitmap_update_sb ( struct bitmap * bitmap )
2005-06-21 17:17:14 -07:00
{
bitmap_super_t * sb ;
if ( ! bitmap | | ! bitmap - > mddev ) /* no bitmap for this array */
2007-07-17 04:06:13 -07:00
return ;
2009-12-14 12:49:56 +11:00
if ( bitmap - > mddev - > bitmap_info . external )
return ;
2012-05-22 13:55:10 +10:00
if ( ! bitmap - > storage . sb_page ) /* no superblock */
2007-07-17 04:06:13 -07:00
return ;
2012-05-22 13:55:10 +10:00
sb = kmap_atomic ( bitmap - > storage . sb_page ) ;
2005-06-21 17:17:14 -07:00
sb - > events = cpu_to_le64 ( bitmap - > mddev - > events ) ;
2011-05-11 14:26:30 +10:00
if ( bitmap - > mddev - > events < bitmap - > events_cleared )
2008-06-28 08:31:22 +10:00
/* rocking back to read-only */
bitmap - > events_cleared = bitmap - > mddev - > events ;
2011-05-11 14:26:30 +10:00
sb - > events_cleared = cpu_to_le64 ( bitmap - > events_cleared ) ;
2017-11-06 10:11:25 +08:00
/*
* clear BITMAP_WRITE_ERROR bit to protect against the case that
* a bitmap write error occurred but the later writes succeeded .
*/
sb - > state = cpu_to_le32 ( bitmap - > flags & ~ BIT ( BITMAP_WRITE_ERROR ) ) ;
2009-12-14 12:49:55 +11:00
/* Just in case these have been changed via sysfs: */
sb - > daemon_sleep = cpu_to_le32 ( bitmap - > mddev - > bitmap_info . daemon_sleep / HZ ) ;
sb - > write_behind = cpu_to_le32 ( bitmap - > mddev - > bitmap_info . max_write_behind ) ;
2012-05-22 13:55:26 +10:00
/* This might have been changed by a reshape */
sb - > sync_size = cpu_to_le64 ( bitmap - > mddev - > resync_max_sectors ) ;
sb - > chunksize = cpu_to_le32 ( bitmap - > mddev - > bitmap_info . chunksize ) ;
2014-03-29 10:20:02 -05:00
sb - > nodes = cpu_to_le32 ( bitmap - > mddev - > bitmap_info . nodes ) ;
2012-05-22 13:55:34 +10:00
sb - > sectors_reserved = cpu_to_le32 ( bitmap - > mddev - >
bitmap_info . space ) ;
2011-11-28 13:25:44 +08:00
kunmap_atomic ( sb ) ;
2023-06-15 08:48:38 +02:00
if ( bitmap - > storage . file )
write_file_page ( bitmap , bitmap - > storage . sb_page , 1 ) ;
else
write_sb_page ( bitmap , bitmap - > storage . sb_index ,
bitmap - > storage . sb_page , 1 ) ;
2005-06-21 17:17:14 -07:00
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL ( md_bitmap_update_sb ) ;
2005-06-21 17:17:14 -07:00
/* print out the bitmap file superblock */
2018-08-01 15:20:50 -07:00
void md_bitmap_print_sb ( struct bitmap * bitmap )
2005-06-21 17:17:14 -07:00
{
bitmap_super_t * sb ;
2012-05-22 13:55:10 +10:00
if ( ! bitmap | | ! bitmap - > storage . sb_page )
2005-06-21 17:17:14 -07:00
return ;
2012-05-22 13:55:10 +10:00
sb = kmap_atomic ( bitmap - > storage . sb_page ) ;
2016-11-02 14:16:49 +11:00
pr_debug ( " %s: bitmap file superblock: \n " , bmname ( bitmap ) ) ;
pr_debug ( " magic: %08x \n " , le32_to_cpu ( sb - > magic ) ) ;
2022-10-25 09:37:05 +02:00
pr_debug ( " version: %u \n " , le32_to_cpu ( sb - > version ) ) ;
2016-11-02 14:16:49 +11:00
pr_debug ( " uuid: %08x.%08x.%08x.%08x \n " ,
2019-04-04 18:56:11 +02:00
le32_to_cpu ( * ( __le32 * ) ( sb - > uuid + 0 ) ) ,
le32_to_cpu ( * ( __le32 * ) ( sb - > uuid + 4 ) ) ,
le32_to_cpu ( * ( __le32 * ) ( sb - > uuid + 8 ) ) ,
le32_to_cpu ( * ( __le32 * ) ( sb - > uuid + 12 ) ) ) ;
2016-11-02 14:16:49 +11:00
pr_debug ( " events: %llu \n " ,
( unsigned long long ) le64_to_cpu ( sb - > events ) ) ;
pr_debug ( " events cleared: %llu \n " ,
( unsigned long long ) le64_to_cpu ( sb - > events_cleared ) ) ;
pr_debug ( " state: %08x \n " , le32_to_cpu ( sb - > state ) ) ;
2022-10-25 09:37:05 +02:00
pr_debug ( " chunksize: %u B \n " , le32_to_cpu ( sb - > chunksize ) ) ;
pr_debug ( " daemon sleep: %us \n " , le32_to_cpu ( sb - > daemon_sleep ) ) ;
2016-11-02 14:16:49 +11:00
pr_debug ( " sync size: %llu KB \n " ,
( unsigned long long ) le64_to_cpu ( sb - > sync_size ) / 2 ) ;
2022-10-25 09:37:05 +02:00
pr_debug ( " max write behind: %u \n " , le32_to_cpu ( sb - > write_behind ) ) ;
2011-11-28 13:25:44 +08:00
kunmap_atomic ( sb ) ;
2005-06-21 17:17:14 -07:00
}
2011-06-08 17:59:30 -05:00
/*
* bitmap_new_disk_sb
* @ bitmap
*
* This function is somewhat the reverse of bitmap_read_sb . bitmap_read_sb
* reads and verifies the on - disk bitmap superblock and populates bitmap_info .
* This function verifies ' bitmap_info ' and populates the on - disk bitmap
* structure , which is to be written to disk .
*
* Returns : 0 on success , - Exxx on error
*/
2018-08-01 15:20:50 -07:00
static int md_bitmap_new_disk_sb ( struct bitmap * bitmap )
2011-06-08 17:59:30 -05:00
{
bitmap_super_t * sb ;
unsigned long chunksize , daemon_sleep , write_behind ;
2015-07-22 12:09:17 -05:00
bitmap - > storage . sb_page = alloc_page ( GFP_KERNEL | __GFP_ZERO ) ;
2012-10-11 13:45:36 +11:00
if ( bitmap - > storage . sb_page = = NULL )
return - ENOMEM ;
2023-06-15 08:48:38 +02:00
bitmap - > storage . sb_index = 0 ;
2011-06-08 17:59:30 -05:00
2012-05-22 13:55:10 +10:00
sb = kmap_atomic ( bitmap - > storage . sb_page ) ;
2011-06-08 17:59:30 -05:00
sb - > magic = cpu_to_le32 ( BITMAP_MAGIC ) ;
sb - > version = cpu_to_le32 ( BITMAP_MAJOR_HI ) ;
chunksize = bitmap - > mddev - > bitmap_info . chunksize ;
BUG_ON ( ! chunksize ) ;
if ( ! is_power_of_2 ( chunksize ) ) {
2011-11-28 13:25:44 +08:00
kunmap_atomic ( sb ) ;
2016-11-02 14:16:49 +11:00
pr_warn ( " bitmap chunksize not a power of 2 \n " ) ;
2011-06-08 17:59:30 -05:00
return - EINVAL ;
}
sb - > chunksize = cpu_to_le32 ( chunksize ) ;
daemon_sleep = bitmap - > mddev - > bitmap_info . daemon_sleep ;
2016-03-07 12:01:05 +00:00
if ( ! daemon_sleep | | ( daemon_sleep > MAX_SCHEDULE_TIMEOUT ) ) {
2016-11-02 14:16:49 +11:00
pr_debug ( " Choosing daemon_sleep default (5 sec) \n " ) ;
2011-06-08 17:59:30 -05:00
daemon_sleep = 5 * HZ ;
}
sb - > daemon_sleep = cpu_to_le32 ( daemon_sleep ) ;
bitmap - > mddev - > bitmap_info . daemon_sleep = daemon_sleep ;
/*
* FIXME : write_behind for RAID1 . If not specified , what
* is a good choice ? We choose COUNTER_MAX / 2 arbitrarily .
*/
write_behind = bitmap - > mddev - > bitmap_info . max_write_behind ;
if ( write_behind > COUNTER_MAX )
write_behind = COUNTER_MAX / 2 ;
sb - > write_behind = cpu_to_le32 ( write_behind ) ;
bitmap - > mddev - > bitmap_info . max_write_behind = write_behind ;
/* keep the array size field of the bitmap superblock up to date */
sb - > sync_size = cpu_to_le64 ( bitmap - > mddev - > resync_max_sectors ) ;
memcpy ( sb - > uuid , bitmap - > mddev - > uuid , 16 ) ;
2012-05-22 13:55:15 +10:00
set_bit ( BITMAP_STALE , & bitmap - > flags ) ;
2012-05-22 13:55:14 +10:00
sb - > state = cpu_to_le32 ( bitmap - > flags ) ;
2011-06-08 17:59:30 -05:00
bitmap - > events_cleared = bitmap - > mddev - > events ;
sb - > events_cleared = cpu_to_le64 ( bitmap - > mddev - > events ) ;
2015-07-22 12:09:17 -05:00
bitmap - > mddev - > bitmap_info . nodes = 0 ;
2011-06-08 17:59:30 -05:00
2011-11-28 13:25:44 +08:00
kunmap_atomic ( sb ) ;
2011-06-08 17:59:30 -05:00
return 0 ;
}
2005-06-21 17:17:14 -07:00
/* read the superblock from the bitmap file and initialize some bitmap fields */
2018-08-01 15:20:50 -07:00
static int md_bitmap_read_sb ( struct bitmap * bitmap )
2005-06-21 17:17:14 -07:00
{
char * reason = NULL ;
bitmap_super_t * sb ;
2005-09-09 16:23:47 -07:00
unsigned long chunksize , daemon_sleep , write_behind ;
2005-06-21 17:17:14 -07:00
unsigned long long events ;
2014-03-29 10:20:02 -05:00
int nodes = 0 ;
2012-05-22 13:55:34 +10:00
unsigned long sectors_reserved = 0 ;
2005-06-21 17:17:14 -07:00
int err = - EINVAL ;
2012-05-22 13:55:08 +10:00
struct page * sb_page ;
2023-06-15 08:48:37 +02:00
loff_t offset = 0 ;
2005-06-21 17:17:14 -07:00
2012-05-22 13:55:10 +10:00
if ( ! bitmap - > storage . file & & ! bitmap - > mddev - > bitmap_info . offset ) {
2012-05-22 13:55:08 +10:00
chunksize = 128 * 1024 * 1024 ;
daemon_sleep = 5 * HZ ;
write_behind = 0 ;
2012-05-22 13:55:15 +10:00
set_bit ( BITMAP_STALE , & bitmap - > flags ) ;
2012-05-22 13:55:08 +10:00
err = 0 ;
goto out_no_sb ;
}
2005-06-21 17:17:14 -07:00
/* page 0 is the superblock, read it... */
2012-05-22 13:55:08 +10:00
sb_page = alloc_page ( GFP_KERNEL ) ;
if ( ! sb_page )
return - ENOMEM ;
2012-05-22 13:55:10 +10:00
bitmap - > storage . sb_page = sb_page ;
2012-05-22 13:55:08 +10:00
2014-06-06 11:50:56 -05:00
re_read :
2014-06-06 12:43:49 -05:00
/* If cluster_slot is set, the cluster is setup */
if ( bitmap - > cluster_slot > = 0 ) {
2015-03-03 13:35:31 +11:00
sector_t bm_blocks = bitmap - > mddev - > resync_max_sectors ;
2014-06-06 12:43:49 -05:00
2020-10-06 00:00:23 +08:00
bm_blocks = DIV_ROUND_UP_SECTOR_T ( bm_blocks ,
( bitmap - > mddev - > bitmap_info . chunksize > > 9 ) ) ;
2015-03-24 11:29:05 -05:00
/* bits to bytes */
bm_blocks = ( ( bm_blocks + 7 ) > > 3 ) + sizeof ( bitmap_super_t ) ;
/* to 4k blocks */
2015-03-02 17:02:29 +11:00
bm_blocks = DIV_ROUND_UP_SECTOR_T ( bm_blocks , 4096 ) ;
2023-06-15 08:48:37 +02:00
offset = bitmap - > cluster_slot * ( bm_blocks < < 3 ) ;
2016-11-02 14:16:49 +11:00
pr_debug ( " %s:%d bm slot: %d offset: %llu \n " , __func__ , __LINE__ ,
2015-07-01 12:19:56 +10:00
bitmap - > cluster_slot , offset ) ;
2014-06-06 12:43:49 -05:00
}
2012-05-22 13:55:10 +10:00
if ( bitmap - > storage . file ) {
loff_t isize = i_size_read ( bitmap - > storage . file - > f_mapping - > host ) ;
2007-01-26 00:57:03 -08:00
int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize ;
2023-06-15 08:48:34 +02:00
err = read_file_page ( bitmap - > storage . file , 0 ,
2012-05-22 13:55:08 +10:00
bitmap , bytes , sb_page ) ;
2007-01-26 00:57:03 -08:00
} else {
2023-06-15 08:48:37 +02:00
err = read_sb_page ( bitmap - > mddev , offset , sb_page , 0 ,
sizeof ( bitmap_super_t ) ) ;
2005-06-21 17:17:27 -07:00
}
2012-05-22 13:55:08 +10:00
if ( err )
2005-06-21 17:17:14 -07:00
return err ;
2014-06-06 11:50:56 -05:00
err = - EINVAL ;
2012-05-22 13:55:08 +10:00
sb = kmap_atomic ( sb_page ) ;
2005-06-21 17:17:14 -07:00
chunksize = le32_to_cpu ( sb - > chunksize ) ;
2009-12-14 12:49:53 +11:00
daemon_sleep = le32_to_cpu ( sb - > daemon_sleep ) * HZ ;
2005-09-09 16:23:47 -07:00
write_behind = le32_to_cpu ( sb - > write_behind ) ;
2012-05-22 13:55:34 +10:00
sectors_reserved = le32_to_cpu ( sb - > sectors_reserved ) ;
2005-06-21 17:17:14 -07:00
/* verify that the bitmap-specific fields are valid */
if ( sb - > magic ! = cpu_to_le32 ( BITMAP_MAGIC ) )
reason = " bad magic " ;
2005-11-08 21:39:32 -08:00
else if ( le32_to_cpu ( sb - > version ) < BITMAP_MAJOR_LO | |
2015-08-19 07:35:54 +10:00
le32_to_cpu ( sb - > version ) > BITMAP_MAJOR_CLUSTERED )
2005-06-21 17:17:14 -07:00
reason = " unrecognized superblock version " ;
2009-03-31 14:27:02 +11:00
else if ( chunksize < 512 )
2006-01-06 00:20:39 -08:00
reason = " bitmap chunksize too small " ;
2011-06-08 18:01:10 -05:00
else if ( ! is_power_of_2 ( chunksize ) )
2005-06-21 17:17:14 -07:00
reason = " bitmap chunksize not a power of 2 " ;
2009-12-14 12:49:53 +11:00
else if ( daemon_sleep < 1 | | daemon_sleep > MAX_SCHEDULE_TIMEOUT )
2006-01-06 00:20:39 -08:00
reason = " daemon sleep period out of range " ;
2005-09-09 16:23:47 -07:00
else if ( write_behind > COUNTER_MAX )
reason = " write-behind limit out of range (0 - 16383) " ;
2005-06-21 17:17:14 -07:00
if ( reason ) {
2016-11-02 14:16:49 +11:00
pr_warn ( " %s: invalid bitmap file superblock: %s \n " ,
2005-06-21 17:17:14 -07:00
bmname ( bitmap ) , reason ) ;
goto out ;
}
2022-04-01 10:13:16 +08:00
/*
* Setup nodes / clustername only if bitmap version is
* cluster - compatible
*/
if ( sb - > version = = cpu_to_le32 ( BITMAP_MAJOR_CLUSTERED ) ) {
nodes = le32_to_cpu ( sb - > nodes ) ;
2022-04-01 10:13:17 +08:00
strscpy ( bitmap - > mddev - > bitmap_info . cluster_name ,
2022-04-01 10:13:16 +08:00
sb - > cluster_name , 64 ) ;
}
2005-06-21 17:17:14 -07:00
/* keep the array size field of the bitmap superblock up to date */
sb - > sync_size = cpu_to_le64 ( bitmap - > mddev - > resync_max_sectors ) ;
2012-03-19 12:46:40 +11:00
if ( bitmap - > mddev - > persistent ) {
/*
* We have a persistent array superblock , so compare the
* bitmap ' s UUID and event counter to the mddev ' s
*/
if ( memcmp ( sb - > uuid , bitmap - > mddev - > uuid , 16 ) ) {
2016-11-02 14:16:49 +11:00
pr_warn ( " %s: bitmap superblock UUID mismatch \n " ,
bmname ( bitmap ) ) ;
2012-03-19 12:46:40 +11:00
goto out ;
}
events = le64_to_cpu ( sb - > events ) ;
2014-06-06 11:50:56 -05:00
if ( ! nodes & & ( events < bitmap - > mddev - > events ) ) {
2016-11-02 14:16:49 +11:00
pr_warn ( " %s: bitmap file is out of date (%llu < %llu) -- forcing full recovery \n " ,
bmname ( bitmap ) , events ,
( unsigned long long ) bitmap - > mddev - > events ) ;
2012-05-22 13:55:15 +10:00
set_bit ( BITMAP_STALE , & bitmap - > flags ) ;
2012-03-19 12:46:40 +11:00
}
2005-06-21 17:17:14 -07:00
}
2012-03-19 12:46:40 +11:00
2005-06-21 17:17:14 -07:00
/* assign fields using values from superblock */
2006-10-21 10:24:09 -07:00
bitmap - > flags | = le32_to_cpu ( sb - > state ) ;
2005-11-08 21:39:32 -08:00
if ( le32_to_cpu ( sb - > version ) = = BITMAP_MAJOR_HOSTENDIAN )
2012-05-22 13:55:15 +10:00
set_bit ( BITMAP_HOSTENDIAN , & bitmap - > flags ) ;
2005-06-21 17:17:14 -07:00
bitmap - > events_cleared = le64_to_cpu ( sb - > events_cleared ) ;
err = 0 ;
2014-06-06 11:50:56 -05:00
2005-06-21 17:17:14 -07:00
out :
2011-11-28 13:25:44 +08:00
kunmap_atomic ( sb ) ;
2015-07-22 12:09:16 -05:00
if ( err = = 0 & & nodes & & ( bitmap - > cluster_slot < 0 ) ) {
2022-04-01 10:13:16 +08:00
/* Assigning chunksize is required for "re_read" */
bitmap - > mddev - > bitmap_info . chunksize = chunksize ;
2014-06-06 11:50:56 -05:00
err = md_setup_cluster ( bitmap - > mddev , nodes ) ;
if ( err ) {
2016-11-02 14:16:49 +11:00
pr_warn ( " %s: Could not setup cluster service (%d) \n " ,
bmname ( bitmap ) , err ) ;
2014-06-06 11:50:56 -05:00
goto out_no_sb ;
}
bitmap - > cluster_slot = md_cluster_ops - > slot_number ( bitmap - > mddev ) ;
goto re_read ;
}
2012-05-22 13:55:08 +10:00
out_no_sb :
2022-04-01 10:13:16 +08:00
if ( err = = 0 ) {
if ( test_bit ( BITMAP_STALE , & bitmap - > flags ) )
bitmap - > events_cleared = bitmap - > mddev - > events ;
bitmap - > mddev - > bitmap_info . chunksize = chunksize ;
bitmap - > mddev - > bitmap_info . daemon_sleep = daemon_sleep ;
bitmap - > mddev - > bitmap_info . max_write_behind = write_behind ;
bitmap - > mddev - > bitmap_info . nodes = nodes ;
if ( bitmap - > mddev - > bitmap_info . space = = 0 | |
bitmap - > mddev - > bitmap_info . space > sectors_reserved )
bitmap - > mddev - > bitmap_info . space = sectors_reserved ;
} else {
2018-08-01 15:20:50 -07:00
md_bitmap_print_sb ( bitmap ) ;
2014-06-06 12:43:49 -05:00
if ( bitmap - > cluster_slot < 0 )
2014-06-06 11:50:56 -05:00
md_cluster_stop ( bitmap - > mddev ) ;
}
2005-06-21 17:17:14 -07:00
return err ;
}
/*
* general bitmap file operations
*/
2009-12-14 12:49:56 +11:00
/*
* on - disk bitmap :
*
* Use one bit per " chunk " ( block set ) . We do the disk I / O on the bitmap
* file a page at a time . There ' s a superblock at the start of the file .
*/
2005-06-21 17:17:14 -07:00
/* calculate the index of the page that contains this bit */
2012-05-22 13:55:10 +10:00
static inline unsigned long file_page_index ( struct bitmap_storage * store ,
unsigned long chunk )
2005-06-21 17:17:14 -07:00
{
2012-05-22 13:55:10 +10:00
if ( store - > sb_page )
2009-12-14 12:49:56 +11:00
chunk + = sizeof ( bitmap_super_t ) < < 3 ;
return chunk > > PAGE_BIT_SHIFT ;
2005-06-21 17:17:14 -07:00
}
/* calculate the (bit) offset of this bit within a page */
2012-05-22 13:55:10 +10:00
static inline unsigned long file_page_offset ( struct bitmap_storage * store ,
unsigned long chunk )
2005-06-21 17:17:14 -07:00
{
2012-05-22 13:55:10 +10:00
if ( store - > sb_page )
2009-12-14 12:49:56 +11:00
chunk + = sizeof ( bitmap_super_t ) < < 3 ;
return chunk & ( PAGE_BITS - 1 ) ;
2005-06-21 17:17:14 -07:00
}
/*
* return a pointer to the page in the filemap that contains the given bit
*
*/
2012-05-22 13:55:10 +10:00
static inline struct page * filemap_get_page ( struct bitmap_storage * store ,
2011-07-27 11:00:37 +10:00
unsigned long chunk )
2005-06-21 17:17:14 -07:00
{
2012-05-22 13:55:10 +10:00
if ( file_page_index ( store , chunk ) > = store - > file_pages )
2010-06-01 19:37:31 +10:00
return NULL ;
2014-05-28 13:39:23 +10:00
return store - > filemap [ file_page_index ( store , chunk ) ] ;
2005-06-21 17:17:14 -07:00
}
2018-08-01 15:20:50 -07:00
static int md_bitmap_storage_alloc ( struct bitmap_storage * store ,
unsigned long chunks , int with_super ,
int slot_number )
2012-05-22 13:55:12 +10:00
{
2014-06-06 11:50:56 -05:00
int pnum , offset = 0 ;
2012-05-22 13:55:12 +10:00
unsigned long num_pages ;
unsigned long bytes ;
bytes = DIV_ROUND_UP ( chunks , 8 ) ;
if ( with_super )
bytes + = sizeof ( bitmap_super_t ) ;
num_pages = DIV_ROUND_UP ( bytes , PAGE_SIZE ) ;
2016-05-02 11:50:13 -04:00
offset = slot_number * num_pages ;
2012-05-22 13:55:12 +10:00
treewide: kmalloc() -> kmalloc_array()
The kmalloc() function has a 2-factor argument form, kmalloc_array(). This
patch replaces cases of:
kmalloc(a * b, gfp)
with:
kmalloc_array(a * b, gfp)
as well as handling cases of:
kmalloc(a * b * c, gfp)
with:
kmalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kmalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kmalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The tools/ directory was manually excluded, since it has its own
implementation of kmalloc().
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kmalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kmalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kmalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kmalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kmalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kmalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kmalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kmalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kmalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kmalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kmalloc
+ kmalloc_array
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kmalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kmalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kmalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kmalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kmalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kmalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kmalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kmalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kmalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kmalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kmalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kmalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kmalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kmalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kmalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kmalloc(C1 * C2 * C3, ...)
|
kmalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kmalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kmalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kmalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kmalloc(sizeof(THING) * C2, ...)
|
kmalloc(sizeof(TYPE) * C2, ...)
|
kmalloc(C1 * C2 * C3, ...)
|
kmalloc(C1 * C2, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kmalloc
+ kmalloc_array
(
- (E1) * E2
+ E1, E2
, ...)
|
- kmalloc
+ kmalloc_array
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kmalloc
+ kmalloc_array
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 13:55:00 -07:00
store - > filemap = kmalloc_array ( num_pages , sizeof ( struct page * ) ,
GFP_KERNEL ) ;
2012-05-22 13:55:12 +10:00
if ( ! store - > filemap )
return - ENOMEM ;
if ( with_super & & ! store - > sb_page ) {
2012-05-22 13:55:25 +10:00
store - > sb_page = alloc_page ( GFP_KERNEL | __GFP_ZERO ) ;
2012-05-22 13:55:12 +10:00
if ( store - > sb_page = = NULL )
return - ENOMEM ;
}
2014-06-06 11:50:56 -05:00
2012-05-22 13:55:12 +10:00
pnum = 0 ;
if ( store - > sb_page ) {
store - > filemap [ 0 ] = store - > sb_page ;
pnum = 1 ;
2023-06-15 08:48:38 +02:00
store - > sb_index = offset ;
2012-05-22 13:55:12 +10:00
}
2014-06-06 11:50:56 -05:00
2012-05-22 13:55:12 +10:00
for ( ; pnum < num_pages ; pnum + + ) {
2012-05-22 13:55:25 +10:00
store - > filemap [ pnum ] = alloc_page ( GFP_KERNEL | __GFP_ZERO ) ;
2012-05-22 13:55:12 +10:00
if ( ! store - > filemap [ pnum ] ) {
store - > file_pages = pnum ;
return - ENOMEM ;
}
}
store - > file_pages = pnum ;
/* We need 4 bits per page, rounded up to a multiple
* of sizeof ( unsigned long ) */
store - > filemap_attr = kzalloc (
roundup ( DIV_ROUND_UP ( num_pages * 4 , 8 ) , sizeof ( unsigned long ) ) ,
GFP_KERNEL ) ;
if ( ! store - > filemap_attr )
return - ENOMEM ;
store - > bytes = bytes ;
return 0 ;
}
2018-08-01 15:20:50 -07:00
static void md_bitmap_file_unmap ( struct bitmap_storage * store )
2005-06-21 17:17:14 -07:00
{
2023-06-15 08:48:31 +02:00
struct file * file = store - > file ;
struct page * sb_page = store - > sb_page ;
struct page * * map = store - > filemap ;
int pages = store - > file_pages ;
2005-06-21 17:17:14 -07:00
while ( pages - - )
2009-12-14 12:49:56 +11:00
if ( map [ pages ] ! = sb_page ) /* 0 is sb_page, release it below */
2006-06-26 00:27:48 -07:00
free_buffers ( map [ pages ] ) ;
2005-06-21 17:17:14 -07:00
kfree ( map ) ;
2012-05-22 13:55:21 +10:00
kfree ( store - > filemap_attr ) ;
2005-06-21 17:17:14 -07:00
2006-06-26 00:27:48 -07:00
if ( sb_page )
free_buffers ( sb_page ) ;
2005-06-21 17:17:14 -07:00
2006-06-26 00:27:48 -07:00
if ( file ) {
2013-01-23 17:07:38 -05:00
struct inode * inode = file_inode ( file ) ;
2007-02-10 01:45:39 -08:00
invalidate_mapping_pages ( inode - > i_mapping , 0 , - 1 ) ;
2005-06-21 17:17:14 -07:00
fput ( file ) ;
2006-06-26 00:27:48 -07:00
}
2005-06-21 17:17:14 -07:00
}
/*
* bitmap_file_kick - if an error occurs while manipulating the bitmap file
* then it is no longer reliable , so we stop using it and we mark the file
* as failed in the superblock
*/
2018-08-01 15:20:50 -07:00
static void md_bitmap_file_kick ( struct bitmap * bitmap )
2005-06-21 17:17:14 -07:00
{
2012-05-22 13:55:15 +10:00
if ( ! test_and_set_bit ( BITMAP_STALE , & bitmap - > flags ) ) {
2018-08-01 15:20:50 -07:00
md_bitmap_update_sb ( bitmap ) ;
2005-06-21 17:17:14 -07:00
2012-05-22 13:55:10 +10:00
if ( bitmap - > storage . file ) {
2023-06-15 08:48:32 +02:00
pr_warn ( " %s: kicking failed bitmap file %pD4 from array! \n " ,
bmname ( bitmap ) , bitmap - > storage . file ) ;
2005-06-21 17:17:14 -07:00
2007-07-17 04:06:13 -07:00
} else
2016-11-02 14:16:49 +11:00
pr_warn ( " %s: disabling internal bitmap due to errors \n " ,
bmname ( bitmap ) ) ;
2005-06-21 17:17:27 -07:00
}
2005-06-21 17:17:14 -07:00
}
enum bitmap_page_attr {
2010-06-01 19:37:31 +10:00
BITMAP_PAGE_DIRTY = 0 , /* there are set bits that need to be synced */
2011-09-21 15:37:46 +10:00
BITMAP_PAGE_PENDING = 1 , /* there are bits that are being cleaned.
* i . e . counter is 1 or 2. */
2010-06-01 19:37:31 +10:00
BITMAP_PAGE_NEEDWRITE = 2 , /* there are cleared bits that need to be synced */
2005-06-21 17:17:14 -07:00
} ;
2012-05-22 13:55:09 +10:00
static inline void set_page_attr ( struct bitmap * bitmap , int pnum ,
enum bitmap_page_attr attr )
2005-06-21 17:17:14 -07:00
{
2012-05-22 13:55:22 +10:00
set_bit ( ( pnum < < 2 ) + attr , bitmap - > storage . filemap_attr ) ;
2005-06-21 17:17:14 -07:00
}
2012-05-22 13:55:09 +10:00
static inline void clear_page_attr ( struct bitmap * bitmap , int pnum ,
enum bitmap_page_attr attr )
2005-06-21 17:17:14 -07:00
{
2012-05-22 13:55:22 +10:00
clear_bit ( ( pnum < < 2 ) + attr , bitmap - > storage . filemap_attr ) ;
2005-06-21 17:17:14 -07:00
}
2012-05-22 13:55:22 +10:00
static inline int test_page_attr ( struct bitmap * bitmap , int pnum ,
enum bitmap_page_attr attr )
2005-06-21 17:17:14 -07:00
{
2012-05-22 13:55:10 +10:00
return test_bit ( ( pnum < < 2 ) + attr , bitmap - > storage . filemap_attr ) ;
2005-06-21 17:17:14 -07:00
}
2012-05-22 13:55:22 +10:00
static inline int test_and_clear_page_attr ( struct bitmap * bitmap , int pnum ,
enum bitmap_page_attr attr )
{
return test_and_clear_bit ( ( pnum < < 2 ) + attr ,
bitmap - > storage . filemap_attr ) ;
}
2005-06-21 17:17:14 -07:00
/*
* bitmap_file_set_bit - - called before performing a write to the md device
* to set ( and eventually sync ) a particular bit in the bitmap file
*
* we set the bit immediately , then we record the page number so that
* when an unplug occurs , we can flush the dirty pages out to disk
*/
2018-08-01 15:20:50 -07:00
static void md_bitmap_file_set_bit ( struct bitmap * bitmap , sector_t block )
2005-06-21 17:17:14 -07:00
{
unsigned long bit ;
2011-07-27 11:00:37 +10:00
struct page * page ;
2005-06-21 17:17:14 -07:00
void * kaddr ;
2012-05-22 13:55:24 +10:00
unsigned long chunk = block > > bitmap - > counts . chunkshift ;
2016-05-02 11:50:14 -04:00
struct bitmap_storage * store = & bitmap - > storage ;
2023-06-15 08:48:38 +02:00
unsigned long index = file_page_index ( store , chunk ) ;
2016-05-02 11:50:14 -04:00
unsigned long node_offset = 0 ;
2024-02-23 20:11:28 +08:00
index + = store - > sb_index ;
2016-05-02 11:50:14 -04:00
if ( mddev_is_clustered ( bitmap - > mddev ) )
node_offset = bitmap - > cluster_slot * store - > file_pages ;
2005-06-21 17:17:14 -07:00
2012-05-22 13:55:10 +10:00
page = filemap_get_page ( & bitmap - > storage , chunk ) ;
2011-07-27 11:00:37 +10:00
if ( ! page )
return ;
2012-05-22 13:55:10 +10:00
bit = file_page_offset ( & bitmap - > storage , chunk ) ;
2005-06-21 17:17:14 -07:00
2011-07-27 11:00:37 +10:00
/* set the bit */
2011-11-28 13:25:44 +08:00
kaddr = kmap_atomic ( page ) ;
2012-05-22 13:55:15 +10:00
if ( test_bit ( BITMAP_HOSTENDIAN , & bitmap - > flags ) )
2011-07-27 11:00:37 +10:00
set_bit ( bit , kaddr ) ;
else
2013-04-24 11:42:41 +10:00
set_bit_le ( bit , kaddr ) ;
2011-11-28 13:25:44 +08:00
kunmap_atomic ( kaddr ) ;
2023-06-15 08:48:38 +02:00
pr_debug ( " set file bit %lu page %lu \n " , bit , index ) ;
2005-06-21 17:17:14 -07:00
/* record page number so it gets flushed to disk when unplug occurs */
2023-06-15 08:48:38 +02:00
set_page_attr ( bitmap , index - node_offset , BITMAP_PAGE_DIRTY ) ;
2005-06-21 17:17:14 -07:00
}
2018-08-01 15:20:50 -07:00
static void md_bitmap_file_clear_bit ( struct bitmap * bitmap , sector_t block )
2012-05-22 13:55:08 +10:00
{
unsigned long bit ;
struct page * page ;
void * paddr ;
2012-05-22 13:55:24 +10:00
unsigned long chunk = block > > bitmap - > counts . chunkshift ;
2016-05-02 11:50:14 -04:00
struct bitmap_storage * store = & bitmap - > storage ;
2023-06-15 08:48:38 +02:00
unsigned long index = file_page_index ( store , chunk ) ;
2016-05-02 11:50:14 -04:00
unsigned long node_offset = 0 ;
2024-02-23 20:11:28 +08:00
index + = store - > sb_index ;
2016-05-02 11:50:14 -04:00
if ( mddev_is_clustered ( bitmap - > mddev ) )
node_offset = bitmap - > cluster_slot * store - > file_pages ;
2012-05-22 13:55:08 +10:00
2012-05-22 13:55:10 +10:00
page = filemap_get_page ( & bitmap - > storage , chunk ) ;
2012-05-22 13:55:08 +10:00
if ( ! page )
return ;
2012-05-22 13:55:10 +10:00
bit = file_page_offset ( & bitmap - > storage , chunk ) ;
2012-05-22 13:55:08 +10:00
paddr = kmap_atomic ( page ) ;
2012-05-22 13:55:15 +10:00
if ( test_bit ( BITMAP_HOSTENDIAN , & bitmap - > flags ) )
2012-05-22 13:55:08 +10:00
clear_bit ( bit , paddr ) ;
else
2013-04-24 11:42:41 +10:00
clear_bit_le ( bit , paddr ) ;
2012-05-22 13:55:08 +10:00
kunmap_atomic ( paddr ) ;
2023-06-15 08:48:38 +02:00
if ( ! test_page_attr ( bitmap , index - node_offset , BITMAP_PAGE_NEEDWRITE ) ) {
set_page_attr ( bitmap , index - node_offset , BITMAP_PAGE_PENDING ) ;
2012-05-22 13:55:08 +10:00
bitmap - > allclean = 0 ;
}
}
2018-08-01 15:20:50 -07:00
static int md_bitmap_file_test_bit ( struct bitmap * bitmap , sector_t block )
2014-06-07 00:36:26 -05:00
{
unsigned long bit ;
struct page * page ;
void * paddr ;
unsigned long chunk = block > > bitmap - > counts . chunkshift ;
int set = 0 ;
page = filemap_get_page ( & bitmap - > storage , chunk ) ;
if ( ! page )
return - EINVAL ;
bit = file_page_offset ( & bitmap - > storage , chunk ) ;
paddr = kmap_atomic ( page ) ;
if ( test_bit ( BITMAP_HOSTENDIAN , & bitmap - > flags ) )
set = test_bit ( bit , paddr ) ;
else
set = test_bit_le ( bit , paddr ) ;
kunmap_atomic ( paddr ) ;
return set ;
}
2005-06-21 17:17:14 -07:00
/* this gets called when the md device is ready to unplug its underlying
* ( slave ) device queues - - before we let any writes go down , we need to
* sync the dirty pages of the bitmap file to disk */
2018-08-01 15:20:50 -07:00
void md_bitmap_unplug ( struct bitmap * bitmap )
2005-06-21 17:17:14 -07:00
{
2012-05-22 13:55:19 +10:00
unsigned long i ;
2006-06-26 00:27:45 -07:00
int dirty , need_write ;
2016-11-04 16:46:03 +11:00
int writing = 0 ;
2005-06-21 17:17:14 -07:00
2023-05-29 21:11:03 +08:00
if ( ! md_bitmap_enabled ( bitmap ) )
2007-07-17 04:06:13 -07:00
return ;
2005-06-21 17:17:14 -07:00
/* look at each page to see if there are any set bits that need to be
* flushed out to disk */
2012-05-22 13:55:10 +10:00
for ( i = 0 ; i < bitmap - > storage . file_pages ; i + + ) {
2012-05-22 13:55:22 +10:00
dirty = test_and_clear_page_attr ( bitmap , i , BITMAP_PAGE_DIRTY ) ;
need_write = test_and_clear_page_attr ( bitmap , i ,
BITMAP_PAGE_NEEDWRITE ) ;
if ( dirty | | need_write ) {
2016-11-14 16:30:21 +11:00
if ( ! writing ) {
2018-08-01 15:20:50 -07:00
md_bitmap_wait_writes ( bitmap ) ;
2024-03-03 07:01:41 -07:00
mddev_add_trace_msg ( bitmap - > mddev ,
" md bitmap_unplug " ) ;
2016-11-14 16:30:21 +11:00
}
2012-05-22 13:55:09 +10:00
clear_page_attr ( bitmap , i , BITMAP_PAGE_PENDING ) ;
2023-06-15 08:48:38 +02:00
filemap_write_page ( bitmap , i , false ) ;
2016-11-04 16:46:03 +11:00
writing = 1 ;
2012-05-22 13:55:22 +10:00
}
2005-06-21 17:17:14 -07:00
}
2016-11-04 16:46:03 +11:00
if ( writing )
2018-08-01 15:20:50 -07:00
md_bitmap_wait_writes ( bitmap ) ;
2014-09-09 14:13:51 +10:00
2012-05-22 13:55:15 +10:00
if ( test_bit ( BITMAP_WRITE_ERROR , & bitmap - > flags ) )
2018-08-01 15:20:50 -07:00
md_bitmap_file_kick ( bitmap ) ;
2005-06-21 17:17:14 -07:00
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL ( md_bitmap_unplug ) ;
2005-06-21 17:17:14 -07:00
2023-05-29 21:11:04 +08:00
struct bitmap_unplug_work {
struct work_struct work ;
struct bitmap * bitmap ;
struct completion * done ;
} ;
static void md_bitmap_unplug_fn ( struct work_struct * work )
{
struct bitmap_unplug_work * unplug_work =
container_of ( work , struct bitmap_unplug_work , work ) ;
md_bitmap_unplug ( unplug_work - > bitmap ) ;
complete ( unplug_work - > done ) ;
}
void md_bitmap_unplug_async ( struct bitmap * bitmap )
{
DECLARE_COMPLETION_ONSTACK ( done ) ;
struct bitmap_unplug_work unplug_work ;
INIT_WORK_ONSTACK ( & unplug_work . work , md_bitmap_unplug_fn ) ;
unplug_work . bitmap = bitmap ;
unplug_work . done = & done ;
queue_work ( md_bitmap_wq , & unplug_work . work ) ;
wait_for_completion ( & done ) ;
}
EXPORT_SYMBOL ( md_bitmap_unplug_async ) ;
2018-08-01 15:20:50 -07:00
static void md_bitmap_set_memory_bits ( struct bitmap * bitmap , sector_t offset , int needed ) ;
2023-06-15 08:48:35 +02:00
/*
* Initialize the in - memory bitmap from the on - disk bitmap and set up the memory
* mapping of the bitmap file .
*
* Special case : If there ' s no bitmap file , or if the bitmap file had been
* previously kicked from the array , we mark all the bits as 1 ' s in order to
* cause a full resync .
2005-09-09 16:23:44 -07:00
*
* We ignore all bits for sectors that end earlier than ' start ' .
2023-06-15 08:48:35 +02:00
* This is used when reading an out - of - date bitmap .
2005-06-21 17:17:14 -07:00
*/
2018-08-01 15:20:50 -07:00
static int md_bitmap_init_from_disk ( struct bitmap * bitmap , sector_t start )
2005-06-21 17:17:14 -07:00
{
2023-06-15 08:48:35 +02:00
bool outofdate = test_bit ( BITMAP_STALE , & bitmap - > flags ) ;
struct mddev * mddev = bitmap - > mddev ;
unsigned long chunks = bitmap - > counts . chunks ;
2012-05-22 13:55:10 +10:00
struct bitmap_storage * store = & bitmap - > storage ;
2023-06-15 08:48:35 +02:00
struct file * file = store - > file ;
unsigned long node_offset = 0 ;
unsigned long bit_cnt = 0 ;
unsigned long i ;
int ret ;
2005-06-21 17:17:14 -07:00
2023-06-15 08:48:35 +02:00
if ( ! file & & ! mddev - > bitmap_info . offset ) {
2012-05-22 13:55:08 +10:00
/* No permanent bitmap - fill with '1s'. */
2012-05-22 13:55:10 +10:00
store - > filemap = NULL ;
store - > file_pages = 0 ;
2012-05-22 13:55:08 +10:00
for ( i = 0 ; i < chunks ; i + + ) {
/* if the disk bit is set, set the memory bit */
2012-05-22 13:55:24 +10:00
int needed = ( ( sector_t ) ( i + 1 ) < < ( bitmap - > counts . chunkshift )
2012-05-22 13:55:08 +10:00
> = start ) ;
2018-08-01 15:20:50 -07:00
md_bitmap_set_memory_bits ( bitmap ,
( sector_t ) i < < bitmap - > counts . chunkshift ,
needed ) ;
2012-05-22 13:55:08 +10:00
}
return 0 ;
}
2005-06-21 17:17:14 -07:00
2012-05-22 13:55:12 +10:00
if ( file & & i_size_read ( file - > f_mapping - > host ) < store - > bytes ) {
2016-11-02 14:16:49 +11:00
pr_warn ( " %s: bitmap file too short %lu < %lu \n " ,
bmname ( bitmap ) ,
( unsigned long ) i_size_read ( file - > f_mapping - > host ) ,
store - > bytes ) ;
2023-06-15 08:48:35 +02:00
ret = - ENOSPC ;
2007-07-17 04:06:13 -07:00
goto err ;
2005-06-21 17:17:14 -07:00
}
2005-06-21 17:17:17 -07:00
2023-06-15 08:48:35 +02:00
if ( mddev_is_clustered ( mddev ) )
2014-06-06 11:50:56 -05:00
node_offset = bitmap - > cluster_slot * ( DIV_ROUND_UP ( store - > bytes , PAGE_SIZE ) ) ;
2023-06-15 08:48:35 +02:00
for ( i = 0 ; i < store - > file_pages ; i + + ) {
struct page * page = store - > filemap [ i ] ;
int count ;
2012-05-22 13:55:08 +10:00
2023-06-15 08:48:35 +02:00
/* unmap the old page, we're done with it */
if ( i = = store - > file_pages - 1 )
count = store - > bytes - i * PAGE_SIZE ;
else
count = PAGE_SIZE ;
if ( file )
ret = read_file_page ( file , i , bitmap , count , page ) ;
else
2023-06-15 08:48:37 +02:00
ret = read_sb_page ( mddev , 0 , page , i + node_offset ,
count ) ;
2023-06-15 08:48:35 +02:00
if ( ret )
goto err ;
}
if ( outofdate ) {
pr_warn ( " %s: bitmap file is out of date, doing full recovery \n " ,
bmname ( bitmap ) ) ;
2005-06-21 17:17:27 -07:00
2023-06-15 08:48:35 +02:00
for ( i = 0 ; i < store - > file_pages ; i + + ) {
struct page * page = store - > filemap [ i ] ;
unsigned long offset = 0 ;
void * paddr ;
2005-06-21 17:17:14 -07:00
2023-06-15 08:48:35 +02:00
if ( i = = 0 & & ! mddev - > bitmap_info . external )
offset = sizeof ( bitmap_super_t ) ;
2007-07-17 04:06:13 -07:00
2023-06-15 08:48:35 +02:00
/*
* If the bitmap is out of date , dirty the whole page
* and write it out
*/
paddr = kmap_atomic ( page ) ;
memset ( paddr + offset , 0xff , PAGE_SIZE - offset ) ;
kunmap_atomic ( paddr ) ;
2023-06-15 08:48:38 +02:00
filemap_write_page ( bitmap , i , true ) ;
2023-06-15 08:48:35 +02:00
if ( test_bit ( BITMAP_WRITE_ERROR , & bitmap - > flags ) ) {
2007-07-17 04:06:13 -07:00
ret = - EIO ;
2023-06-15 08:48:35 +02:00
goto err ;
2005-06-21 17:17:14 -07:00
}
}
2023-06-15 08:48:35 +02:00
}
for ( i = 0 ; i < chunks ; i + + ) {
struct page * page = filemap_get_page ( & bitmap - > storage , i ) ;
unsigned long bit = file_page_offset ( & bitmap - > storage , i ) ;
void * paddr ;
bool was_set ;
2011-11-28 13:25:44 +08:00
paddr = kmap_atomic ( page ) ;
2012-05-22 13:55:15 +10:00
if ( test_bit ( BITMAP_HOSTENDIAN , & bitmap - > flags ) )
2023-06-15 08:48:35 +02:00
was_set = test_bit ( bit , paddr ) ;
2005-11-08 21:39:32 -08:00
else
2023-06-15 08:48:35 +02:00
was_set = test_bit_le ( bit , paddr ) ;
2011-11-28 13:25:44 +08:00
kunmap_atomic ( paddr ) ;
2023-06-15 08:48:35 +02:00
if ( was_set ) {
2005-06-21 17:17:14 -07:00
/* if the disk bit is set, set the memory bit */
2012-05-22 13:55:24 +10:00
int needed = ( ( sector_t ) ( i + 1 ) < < bitmap - > counts . chunkshift
2009-05-07 12:49:06 +10:00
> = start ) ;
2018-08-01 15:20:50 -07:00
md_bitmap_set_memory_bits ( bitmap ,
( sector_t ) i < < bitmap - > counts . chunkshift ,
needed ) ;
2005-06-21 17:17:14 -07:00
bit_cnt + + ;
}
}
2016-11-02 14:16:49 +11:00
pr_debug ( " %s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits \n " ,
bmname ( bitmap ) , store - > file_pages ,
bit_cnt , chunks ) ;
2007-07-17 04:06:13 -07:00
return 0 ;
2005-06-21 17:17:14 -07:00
2007-07-17 04:06:13 -07:00
err :
2016-11-02 14:16:49 +11:00
pr_warn ( " %s: bitmap initialisation failed: %d \n " ,
bmname ( bitmap ) , ret ) ;
2005-06-21 17:17:14 -07:00
return ret ;
}
2018-08-01 15:20:50 -07:00
void md_bitmap_write_all ( struct bitmap * bitmap )
2005-06-21 17:17:27 -07:00
{
/* We don't actually write all bitmap blocks here,
* just flag them as needing to be written
*/
2006-06-26 00:27:45 -07:00
int i ;
2005-06-21 17:17:27 -07:00
2012-05-22 13:55:10 +10:00
if ( ! bitmap | | ! bitmap - > storage . filemap )
2012-05-22 13:55:08 +10:00
return ;
2012-05-22 13:55:10 +10:00
if ( bitmap - > storage . file )
2012-05-22 13:55:08 +10:00
/* Only one copy, so nothing needed */
return ;
2012-05-22 13:55:10 +10:00
for ( i = 0 ; i < bitmap - > storage . file_pages ; i + + )
2012-05-22 13:55:09 +10:00
set_page_attr ( bitmap , i ,
2006-06-26 00:27:45 -07:00
BITMAP_PAGE_NEEDWRITE ) ;
2011-09-21 15:37:46 +10:00
bitmap - > allclean = 0 ;
2005-06-21 17:17:27 -07:00
}
2018-08-01 15:20:50 -07:00
static void md_bitmap_count_page ( struct bitmap_counts * bitmap ,
sector_t offset , int inc )
2005-06-21 17:17:14 -07:00
{
2012-03-19 12:46:41 +11:00
sector_t chunk = offset > > bitmap - > chunkshift ;
2005-06-21 17:17:14 -07:00
unsigned long page = chunk > > PAGE_COUNTER_SHIFT ;
bitmap - > bp [ page ] . count + = inc ;
2018-08-01 15:20:50 -07:00
md_bitmap_checkfree ( bitmap , page ) ;
2005-06-21 17:17:14 -07:00
}
2012-05-22 13:55:06 +10:00
2018-08-01 15:20:50 -07:00
static void md_bitmap_set_pending ( struct bitmap_counts * bitmap , sector_t offset )
2012-05-22 13:55:06 +10:00
{
sector_t chunk = offset > > bitmap - > chunkshift ;
unsigned long page = chunk > > PAGE_COUNTER_SHIFT ;
struct bitmap_page * bp = & bitmap - > bp [ page ] ;
if ( ! bp - > pending )
bp - > pending = 1 ;
}
2018-08-01 15:20:50 -07:00
static bitmap_counter_t * md_bitmap_get_counter ( struct bitmap_counts * bitmap ,
sector_t offset , sector_t * blocks ,
int create ) ;
2005-06-21 17:17:14 -07:00
md/bitmap: factor out a helper to set timeout
Register/unregister 'mddev->thread' are both under 'reconfig_mutex',
however, some context didn't hold the mutex to access mddev->thread,
which can cause null-ptr-deference:
1) md_bitmap_daemon_work() can be called from md_check_recovery() where
'reconfig_mutex' is not held, deference 'mddev->thread' might cause
null-ptr-deference, because md_unregister_thread() reset the pointer
before stopping the thread.
2) timeout_store() access 'mddev->thread' multiple times,
null-ptr-deference can be triggered if 'mddev->thread' is reset in the
middle.
This patch factor out a helper to set timeout, the new helper always
check if 'mddev->thread' is null first, so that problem 1 can be fixed.
Now that this helper only access 'mddev->thread' once, but it's possible
that 'mddev->thread' can be freed while this helper is still in progress,
hence the problem is not fixed yet. Follow up patches will fix this by
protecting md_thread with rcu.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230523021017.3048783-5-yukuai1@huaweicloud.com
2023-05-23 10:10:16 +08:00
static void mddev_set_timeout ( struct mddev * mddev , unsigned long timeout ,
bool force )
{
2023-05-23 10:10:17 +08:00
struct md_thread * thread ;
rcu_read_lock ( ) ;
thread = rcu_dereference ( mddev - > thread ) ;
md/bitmap: factor out a helper to set timeout
Register/unregister 'mddev->thread' are both under 'reconfig_mutex',
however, some context didn't hold the mutex to access mddev->thread,
which can cause null-ptr-deference:
1) md_bitmap_daemon_work() can be called from md_check_recovery() where
'reconfig_mutex' is not held, deference 'mddev->thread' might cause
null-ptr-deference, because md_unregister_thread() reset the pointer
before stopping the thread.
2) timeout_store() access 'mddev->thread' multiple times,
null-ptr-deference can be triggered if 'mddev->thread' is reset in the
middle.
This patch factor out a helper to set timeout, the new helper always
check if 'mddev->thread' is null first, so that problem 1 can be fixed.
Now that this helper only access 'mddev->thread' once, but it's possible
that 'mddev->thread' can be freed while this helper is still in progress,
hence the problem is not fixed yet. Follow up patches will fix this by
protecting md_thread with rcu.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230523021017.3048783-5-yukuai1@huaweicloud.com
2023-05-23 10:10:16 +08:00
if ( ! thread )
2023-05-23 10:10:17 +08:00
goto out ;
md/bitmap: factor out a helper to set timeout
Register/unregister 'mddev->thread' are both under 'reconfig_mutex',
however, some context didn't hold the mutex to access mddev->thread,
which can cause null-ptr-deference:
1) md_bitmap_daemon_work() can be called from md_check_recovery() where
'reconfig_mutex' is not held, deference 'mddev->thread' might cause
null-ptr-deference, because md_unregister_thread() reset the pointer
before stopping the thread.
2) timeout_store() access 'mddev->thread' multiple times,
null-ptr-deference can be triggered if 'mddev->thread' is reset in the
middle.
This patch factor out a helper to set timeout, the new helper always
check if 'mddev->thread' is null first, so that problem 1 can be fixed.
Now that this helper only access 'mddev->thread' once, but it's possible
that 'mddev->thread' can be freed while this helper is still in progress,
hence the problem is not fixed yet. Follow up patches will fix this by
protecting md_thread with rcu.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230523021017.3048783-5-yukuai1@huaweicloud.com
2023-05-23 10:10:16 +08:00
if ( force | | thread - > timeout < MAX_SCHEDULE_TIMEOUT )
thread - > timeout = timeout ;
2023-05-23 10:10:17 +08:00
out :
rcu_read_unlock ( ) ;
md/bitmap: factor out a helper to set timeout
Register/unregister 'mddev->thread' are both under 'reconfig_mutex',
however, some context didn't hold the mutex to access mddev->thread,
which can cause null-ptr-deference:
1) md_bitmap_daemon_work() can be called from md_check_recovery() where
'reconfig_mutex' is not held, deference 'mddev->thread' might cause
null-ptr-deference, because md_unregister_thread() reset the pointer
before stopping the thread.
2) timeout_store() access 'mddev->thread' multiple times,
null-ptr-deference can be triggered if 'mddev->thread' is reset in the
middle.
This patch factor out a helper to set timeout, the new helper always
check if 'mddev->thread' is null first, so that problem 1 can be fixed.
Now that this helper only access 'mddev->thread' once, but it's possible
that 'mddev->thread' can be freed while this helper is still in progress,
hence the problem is not fixed yet. Follow up patches will fix this by
protecting md_thread with rcu.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230523021017.3048783-5-yukuai1@huaweicloud.com
2023-05-23 10:10:16 +08:00
}
2005-06-21 17:17:14 -07:00
/*
* bitmap daemon - - periodically wakes up to clean bits and flush pages
* out to disk
*/
2018-08-01 15:20:50 -07:00
void md_bitmap_daemon_work ( struct mddev * mddev )
2005-06-21 17:17:14 -07:00
{
2009-12-14 12:49:46 +11:00
struct bitmap * bitmap ;
2005-06-21 17:17:22 -07:00
unsigned long j ;
2012-05-22 13:55:06 +10:00
unsigned long nextpage ;
2010-10-19 10:03:39 +11:00
sector_t blocks ;
2012-05-22 13:55:24 +10:00
struct bitmap_counts * counts ;
2005-06-21 17:17:14 -07:00
2009-12-14 12:49:46 +11:00
/* Use a mutex to guard daemon_work against
* bitmap_destroy .
*/
2009-12-14 12:49:52 +11:00
mutex_lock ( & mddev - > bitmap_info . mutex ) ;
2009-12-14 12:49:46 +11:00
bitmap = mddev - > bitmap ;
if ( bitmap = = NULL ) {
2009-12-14 12:49:52 +11:00
mutex_unlock ( & mddev - > bitmap_info . mutex ) ;
2007-07-17 04:06:13 -07:00
return ;
2009-12-14 12:49:46 +11:00
}
2009-12-14 12:49:53 +11:00
if ( time_before ( jiffies , bitmap - > daemon_lastrun
2011-12-23 10:17:50 +11:00
+ mddev - > bitmap_info . daemon_sleep ) )
2008-03-10 11:43:48 -07:00
goto done ;
2005-06-21 17:17:14 -07:00
bitmap - > daemon_lastrun = jiffies ;
2008-03-04 14:29:30 -08:00
if ( bitmap - > allclean ) {
md/bitmap: factor out a helper to set timeout
Register/unregister 'mddev->thread' are both under 'reconfig_mutex',
however, some context didn't hold the mutex to access mddev->thread,
which can cause null-ptr-deference:
1) md_bitmap_daemon_work() can be called from md_check_recovery() where
'reconfig_mutex' is not held, deference 'mddev->thread' might cause
null-ptr-deference, because md_unregister_thread() reset the pointer
before stopping the thread.
2) timeout_store() access 'mddev->thread' multiple times,
null-ptr-deference can be triggered if 'mddev->thread' is reset in the
middle.
This patch factor out a helper to set timeout, the new helper always
check if 'mddev->thread' is null first, so that problem 1 can be fixed.
Now that this helper only access 'mddev->thread' once, but it's possible
that 'mddev->thread' can be freed while this helper is still in progress,
hence the problem is not fixed yet. Follow up patches will fix this by
protecting md_thread with rcu.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230523021017.3048783-5-yukuai1@huaweicloud.com
2023-05-23 10:10:16 +08:00
mddev_set_timeout ( mddev , MAX_SCHEDULE_TIMEOUT , true ) ;
2009-12-14 12:49:46 +11:00
goto done ;
2008-03-04 14:29:30 -08:00
}
bitmap - > allclean = 1 ;
2005-06-21 17:17:14 -07:00
2024-03-03 07:01:41 -07:00
mddev_add_trace_msg ( bitmap - > mddev , " md bitmap_daemon_work " ) ;
2016-11-14 16:30:21 +11:00
2012-05-22 13:55:06 +10:00
/* Any file-page which is PENDING now needs to be written.
* So set NEEDWRITE now , then after we make any last - minute changes
* we will write it .
*/
2012-05-22 13:55:10 +10:00
for ( j = 0 ; j < bitmap - > storage . file_pages ; j + + )
2012-05-22 13:55:22 +10:00
if ( test_and_clear_page_attr ( bitmap , j ,
BITMAP_PAGE_PENDING ) )
2012-05-22 13:55:09 +10:00
set_page_attr ( bitmap , j ,
2012-05-22 13:55:06 +10:00
BITMAP_PAGE_NEEDWRITE ) ;
if ( bitmap - > need_sync & &
mddev - > bitmap_info . external = = 0 ) {
/* Arrange for superblock update as well as
* other changes */
bitmap_super_t * sb ;
bitmap - > need_sync = 0 ;
2012-05-22 13:55:10 +10:00
if ( bitmap - > storage . filemap ) {
sb = kmap_atomic ( bitmap - > storage . sb_page ) ;
2012-05-22 13:55:08 +10:00
sb - > events_cleared =
cpu_to_le64 ( bitmap - > events_cleared ) ;
kunmap_atomic ( sb ) ;
2012-05-22 13:55:09 +10:00
set_page_attr ( bitmap , 0 ,
2012-05-22 13:55:08 +10:00
BITMAP_PAGE_NEEDWRITE ) ;
}
2012-05-22 13:55:06 +10:00
}
/* Now look at the bitmap counters and if any are '2' or '1',
* decrement and handle accordingly .
*/
2012-05-22 13:55:24 +10:00
counts = & bitmap - > counts ;
spin_lock_irq ( & counts - > lock ) ;
2012-05-22 13:55:06 +10:00
nextpage = 0 ;
2012-05-22 13:55:24 +10:00
for ( j = 0 ; j < counts - > chunks ; j + + ) {
2005-06-21 17:17:14 -07:00
bitmap_counter_t * bmc ;
2012-05-22 13:55:24 +10:00
sector_t block = ( sector_t ) j < < counts - > chunkshift ;
2011-07-27 11:00:37 +10:00
2012-05-22 13:55:06 +10:00
if ( j = = nextpage ) {
nextpage + = PAGE_COUNTER_RATIO ;
2012-05-22 13:55:24 +10:00
if ( ! counts - > bp [ j > > PAGE_COUNTER_SHIFT ] . pending ) {
2012-05-22 13:55:06 +10:00
j | = PAGE_COUNTER_MASK ;
2005-06-21 17:17:22 -07:00
continue ;
}
2012-05-22 13:55:24 +10:00
counts - > bp [ j > > PAGE_COUNTER_SHIFT ] . pending = 0 ;
2005-06-21 17:17:14 -07:00
}
2012-05-22 13:55:06 +10:00
2018-08-01 15:20:50 -07:00
bmc = md_bitmap_get_counter ( counts , block , & blocks , 0 ) ;
2012-05-22 13:55:06 +10:00
if ( ! bmc ) {
2011-09-21 15:37:46 +10:00
j | = PAGE_COUNTER_MASK ;
2012-05-22 13:55:06 +10:00
continue ;
}
if ( * bmc = = 1 & & ! bitmap - > need_sync ) {
/* We can clear the bit */
* bmc = 0 ;
2018-08-01 15:20:50 -07:00
md_bitmap_count_page ( counts , block , - 1 ) ;
md_bitmap_file_clear_bit ( bitmap , block ) ;
2012-05-22 13:55:06 +10:00
} else if ( * bmc & & * bmc < = 2 ) {
* bmc = 1 ;
2018-08-01 15:20:50 -07:00
md_bitmap_set_pending ( counts , block ) ;
2012-05-22 13:55:06 +10:00
bitmap - > allclean = 0 ;
2011-09-21 15:37:46 +10:00
}
2005-06-21 17:17:14 -07:00
}
2012-05-22 13:55:24 +10:00
spin_unlock_irq ( & counts - > lock ) ;
2005-06-21 17:17:14 -07:00
2018-08-01 15:20:50 -07:00
md_bitmap_wait_writes ( bitmap ) ;
2012-05-22 13:55:06 +10:00
/* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
* DIRTY pages need to be written by bitmap_unplug so it can wait
* for them .
* If we find any DIRTY page we stop there and let bitmap_unplug
* handle all the rest . This is important in the case where
* the first blocking holds the superblock and it has been updated .
* We mustn ' t write any other blocks before the superblock .
*/
2012-05-22 13:55:21 +10:00
for ( j = 0 ;
j < bitmap - > storage . file_pages
& & ! test_bit ( BITMAP_STALE , & bitmap - > flags ) ;
j + + ) {
2012-05-22 13:55:09 +10:00
if ( test_page_attr ( bitmap , j ,
2012-05-22 13:55:06 +10:00
BITMAP_PAGE_DIRTY ) )
/* bitmap_unplug will handle the rest */
break ;
2019-12-07 11:00:08 +08:00
if ( bitmap - > storage . filemap & &
test_and_clear_page_attr ( bitmap , j ,
2023-06-15 08:48:38 +02:00
BITMAP_PAGE_NEEDWRITE ) )
filemap_write_page ( bitmap , j , false ) ;
2005-06-21 17:17:14 -07:00
}
2008-03-10 11:43:48 -07:00
done :
2008-03-04 14:29:30 -08:00
if ( bitmap - > allclean = = 0 )
md/bitmap: factor out a helper to set timeout
Register/unregister 'mddev->thread' are both under 'reconfig_mutex',
however, some context didn't hold the mutex to access mddev->thread,
which can cause null-ptr-deference:
1) md_bitmap_daemon_work() can be called from md_check_recovery() where
'reconfig_mutex' is not held, deference 'mddev->thread' might cause
null-ptr-deference, because md_unregister_thread() reset the pointer
before stopping the thread.
2) timeout_store() access 'mddev->thread' multiple times,
null-ptr-deference can be triggered if 'mddev->thread' is reset in the
middle.
This patch factor out a helper to set timeout, the new helper always
check if 'mddev->thread' is null first, so that problem 1 can be fixed.
Now that this helper only access 'mddev->thread' once, but it's possible
that 'mddev->thread' can be freed while this helper is still in progress,
hence the problem is not fixed yet. Follow up patches will fix this by
protecting md_thread with rcu.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230523021017.3048783-5-yukuai1@huaweicloud.com
2023-05-23 10:10:16 +08:00
mddev_set_timeout ( mddev , mddev - > bitmap_info . daemon_sleep , true ) ;
2009-12-14 12:49:52 +11:00
mutex_unlock ( & mddev - > bitmap_info . mutex ) ;
2005-06-21 17:17:14 -07:00
}
2018-08-01 15:20:50 -07:00
static bitmap_counter_t * md_bitmap_get_counter ( struct bitmap_counts * bitmap ,
sector_t offset , sector_t * blocks ,
int create )
2009-09-23 18:06:44 +10:00
__releases ( bitmap - > lock )
__acquires ( bitmap - > lock )
2005-06-21 17:17:14 -07:00
{
/* If 'create', we might release the lock and reclaim it.
* The lock must have been taken with interrupts enabled .
* If ! create , we don ' t release the lock .
*/
2012-03-19 12:46:41 +11:00
sector_t chunk = offset > > bitmap - > chunkshift ;
2005-06-21 17:17:14 -07:00
unsigned long page = chunk > > PAGE_COUNTER_SHIFT ;
unsigned long pageoff = ( chunk & PAGE_COUNTER_MASK ) < < COUNTER_BYTE_SHIFT ;
2024-04-22 14:58:24 +08:00
sector_t csize = ( ( sector_t ) 1 ) < < bitmap - > chunkshift ;
2010-06-01 19:37:33 +10:00
int err ;
2005-06-21 17:17:14 -07:00
2023-05-15 21:48:05 +08:00
if ( page > = bitmap - > pages ) {
/*
* This can happen if bitmap_start_sync goes beyond
* End - of - device while looking for a whole page or
* user set a huge number to sysfs bitmap_set_bits .
*/
2024-04-22 14:58:24 +08:00
* blocks = csize - ( offset & ( csize - 1 ) ) ;
2023-05-15 21:48:05 +08:00
return NULL ;
}
2018-08-01 15:20:50 -07:00
err = md_bitmap_checkpage ( bitmap , page , create , 0 ) ;
2010-06-01 19:37:33 +10:00
if ( bitmap - > bp [ page ] . hijacked | |
bitmap - > bp [ page ] . map = = NULL )
2012-03-19 12:46:41 +11:00
csize = ( ( sector_t ) 1 ) < < ( bitmap - > chunkshift +
2020-10-06 00:00:24 +08:00
PAGE_COUNTER_SHIFT ) ;
2024-04-22 14:58:24 +08:00
2010-06-01 19:37:33 +10:00
* blocks = csize - ( offset & ( csize - 1 ) ) ;
if ( err < 0 )
2005-06-21 17:17:14 -07:00
return NULL ;
2010-06-01 19:37:33 +10:00
2005-06-21 17:17:14 -07:00
/* now locked ... */
if ( bitmap - > bp [ page ] . hijacked ) { /* hijacked pointer */
/* should we use the first or second counter field
* of the hijacked pointer ? */
int hi = ( pageoff > PAGE_COUNTER_MASK ) ;
return & ( ( bitmap_counter_t * )
& bitmap - > bp [ page ] . map ) [ hi ] ;
2010-06-01 19:37:33 +10:00
} else /* page is allocated */
2005-06-21 17:17:14 -07:00
return ( bitmap_counter_t * )
& ( bitmap - > bp [ page ] . map [ pageoff ] ) ;
}
2018-08-01 15:20:50 -07:00
int md_bitmap_startwrite ( struct bitmap * bitmap , sector_t offset , unsigned long sectors , int behind )
2005-06-21 17:17:14 -07:00
{
2010-06-01 19:37:31 +10:00
if ( ! bitmap )
return 0 ;
2005-09-09 16:23:47 -07:00
if ( behind ) {
2010-03-08 16:02:37 +11:00
int bw ;
2005-09-09 16:23:47 -07:00
atomic_inc ( & bitmap - > behind_writes ) ;
2010-03-08 16:02:37 +11:00
bw = atomic_read ( & bitmap - > behind_writes ) ;
if ( bw > bitmap - > behind_writes_used )
bitmap - > behind_writes_used = bw ;
2011-10-07 14:23:17 +11:00
pr_debug ( " inc write-behind count %d/%lu \n " ,
bw , bitmap - > mddev - > bitmap_info . max_write_behind ) ;
2005-09-09 16:23:47 -07:00
}
2005-06-21 17:17:14 -07:00
while ( sectors ) {
2010-10-19 10:03:39 +11:00
sector_t blocks ;
2005-06-21 17:17:14 -07:00
bitmap_counter_t * bmc ;
2012-05-22 13:55:24 +10:00
spin_lock_irq ( & bitmap - > counts . lock ) ;
2018-08-01 15:20:50 -07:00
bmc = md_bitmap_get_counter ( & bitmap - > counts , offset , & blocks , 1 ) ;
2005-06-21 17:17:14 -07:00
if ( ! bmc ) {
2012-05-22 13:55:24 +10:00
spin_unlock_irq ( & bitmap - > counts . lock ) ;
2005-06-21 17:17:14 -07:00
return 0 ;
}
2011-06-09 11:42:57 +10:00
if ( unlikely ( COUNTER ( * bmc ) = = COUNTER_MAX ) ) {
2007-02-08 14:20:37 -08:00
DEFINE_WAIT ( __wait ) ;
/* note that it is safe to do the prepare_to_wait
* after the test as long as we do it before dropping
* the spinlock .
*/
prepare_to_wait ( & bitmap - > overflow_wait , & __wait ,
TASK_UNINTERRUPTIBLE ) ;
2012-05-22 13:55:24 +10:00
spin_unlock_irq ( & bitmap - > counts . lock ) ;
2012-08-02 08:33:20 +10:00
schedule ( ) ;
2007-02-08 14:20:37 -08:00
finish_wait ( & bitmap - > overflow_wait , & __wait ) ;
continue ;
}
2010-06-01 19:37:31 +10:00
switch ( * bmc ) {
2005-06-21 17:17:14 -07:00
case 0 :
2018-08-01 15:20:50 -07:00
md_bitmap_file_set_bit ( bitmap , offset ) ;
md_bitmap_count_page ( & bitmap - > counts , offset , 1 ) ;
2020-08-23 17:36:59 -05:00
fallthrough ;
2005-06-21 17:17:14 -07:00
case 1 :
* bmc = 2 ;
}
2007-02-08 14:20:37 -08:00
2005-06-21 17:17:14 -07:00
( * bmc ) + + ;
2012-05-22 13:55:24 +10:00
spin_unlock_irq ( & bitmap - > counts . lock ) ;
2005-06-21 17:17:14 -07:00
offset + = blocks ;
if ( sectors > blocks )
sectors - = blocks ;
2010-06-01 19:37:31 +10:00
else
sectors = 0 ;
2005-06-21 17:17:14 -07:00
}
return 0 ;
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL ( md_bitmap_startwrite ) ;
2005-06-21 17:17:14 -07:00
2018-08-01 15:20:50 -07:00
void md_bitmap_endwrite ( struct bitmap * bitmap , sector_t offset ,
unsigned long sectors , int success , int behind )
2005-06-21 17:17:14 -07:00
{
2010-06-01 19:37:31 +10:00
if ( ! bitmap )
return ;
2005-09-09 16:23:47 -07:00
if ( behind ) {
2010-03-31 11:21:44 +11:00
if ( atomic_dec_and_test ( & bitmap - > behind_writes ) )
wake_up ( & bitmap - > behind_wait ) ;
2011-10-07 14:23:17 +11:00
pr_debug ( " dec write-behind count %d/%lu \n " ,
atomic_read ( & bitmap - > behind_writes ) ,
bitmap - > mddev - > bitmap_info . max_write_behind ) ;
2005-09-09 16:23:47 -07:00
}
2005-06-21 17:17:14 -07:00
while ( sectors ) {
2010-10-19 10:03:39 +11:00
sector_t blocks ;
2005-06-21 17:17:14 -07:00
unsigned long flags ;
bitmap_counter_t * bmc ;
2012-05-22 13:55:24 +10:00
spin_lock_irqsave ( & bitmap - > counts . lock , flags ) ;
2018-08-01 15:20:50 -07:00
bmc = md_bitmap_get_counter ( & bitmap - > counts , offset , & blocks , 0 ) ;
2005-06-21 17:17:14 -07:00
if ( ! bmc ) {
2012-05-22 13:55:24 +10:00
spin_unlock_irqrestore ( & bitmap - > counts . lock , flags ) ;
2005-06-21 17:17:14 -07:00
return ;
}
2011-12-23 09:57:48 +11:00
if ( success & & ! bitmap - > mddev - > degraded & &
2008-06-28 08:31:22 +10:00
bitmap - > events_cleared < bitmap - > mddev - > events ) {
bitmap - > events_cleared = bitmap - > mddev - > events ;
bitmap - > need_sync = 1 ;
2010-06-01 19:37:32 +10:00
sysfs_notify_dirent_safe ( bitmap - > sysfs_can_clear ) ;
2008-06-28 08:31:22 +10:00
}
2011-06-09 11:42:57 +10:00
if ( ! success & & ! NEEDED ( * bmc ) )
2005-06-21 17:17:14 -07:00
* bmc | = NEEDED_MASK ;
2011-06-09 11:42:57 +10:00
if ( COUNTER ( * bmc ) = = COUNTER_MAX )
2007-02-08 14:20:37 -08:00
wake_up ( & bitmap - > overflow_wait ) ;
2005-06-21 17:17:14 -07:00
( * bmc ) - - ;
2011-09-21 15:37:46 +10:00
if ( * bmc < = 2 ) {
2018-08-01 15:20:50 -07:00
md_bitmap_set_pending ( & bitmap - > counts , offset ) ;
2011-09-21 15:37:46 +10:00
bitmap - > allclean = 0 ;
}
2012-05-22 13:55:24 +10:00
spin_unlock_irqrestore ( & bitmap - > counts . lock , flags ) ;
2005-06-21 17:17:14 -07:00
offset + = blocks ;
if ( sectors > blocks )
sectors - = blocks ;
2010-06-01 19:37:31 +10:00
else
sectors = 0 ;
2005-06-21 17:17:14 -07:00
}
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL ( md_bitmap_endwrite ) ;
2005-06-21 17:17:14 -07:00
2010-10-19 10:03:39 +11:00
static int __bitmap_start_sync ( struct bitmap * bitmap , sector_t offset , sector_t * blocks ,
2009-03-31 14:27:02 +11:00
int degraded )
2005-06-21 17:17:14 -07:00
{
bitmap_counter_t * bmc ;
int rv ;
if ( bitmap = = NULL ) { /* FIXME or bitmap set as 'failed' */
* blocks = 1024 ;
return 1 ; /* always resync if no bitmap */
}
2012-05-22 13:55:24 +10:00
spin_lock_irq ( & bitmap - > counts . lock ) ;
2018-08-01 15:20:50 -07:00
bmc = md_bitmap_get_counter ( & bitmap - > counts , offset , blocks , 0 ) ;
2005-06-21 17:17:14 -07:00
rv = 0 ;
if ( bmc ) {
/* locked */
if ( RESYNC ( * bmc ) )
rv = 1 ;
else if ( NEEDED ( * bmc ) ) {
rv = 1 ;
2005-07-15 03:56:35 -07:00
if ( ! degraded ) { /* don't set/clear bits if degraded */
* bmc | = RESYNC_MASK ;
* bmc & = ~ NEEDED_MASK ;
}
2005-06-21 17:17:14 -07:00
}
}
2012-05-22 13:55:24 +10:00
spin_unlock_irq ( & bitmap - > counts . lock ) ;
2005-06-21 17:17:14 -07:00
return rv ;
}
2018-08-01 15:20:50 -07:00
int md_bitmap_start_sync ( struct bitmap * bitmap , sector_t offset , sector_t * blocks ,
int degraded )
2009-03-31 14:27:02 +11:00
{
/* bitmap_start_sync must always report on multiples of whole
* pages , otherwise resync ( which is very PAGE_SIZE based ) will
* get confused .
* So call __bitmap_start_sync repeatedly ( if needed ) until
* At least PAGE_SIZE > > 9 blocks are covered .
* Return the ' or ' of the result .
*/
int rv = 0 ;
2010-10-19 10:03:39 +11:00
sector_t blocks1 ;
2009-03-31 14:27:02 +11:00
* blocks = 0 ;
while ( * blocks < ( PAGE_SIZE > > 9 ) ) {
rv | = __bitmap_start_sync ( bitmap , offset ,
& blocks1 , degraded ) ;
offset + = blocks1 ;
* blocks + = blocks1 ;
}
return rv ;
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL ( md_bitmap_start_sync ) ;
2009-03-31 14:27:02 +11:00
2018-08-01 15:20:50 -07:00
void md_bitmap_end_sync ( struct bitmap * bitmap , sector_t offset , sector_t * blocks , int aborted )
2005-06-21 17:17:14 -07:00
{
bitmap_counter_t * bmc ;
unsigned long flags ;
2010-06-01 19:37:31 +10:00
if ( bitmap = = NULL ) {
2005-06-21 17:17:14 -07:00
* blocks = 1024 ;
return ;
}
2012-05-22 13:55:24 +10:00
spin_lock_irqsave ( & bitmap - > counts . lock , flags ) ;
2018-08-01 15:20:50 -07:00
bmc = md_bitmap_get_counter ( & bitmap - > counts , offset , blocks , 0 ) ;
2005-06-21 17:17:14 -07:00
if ( bmc = = NULL )
goto unlock ;
/* locked */
if ( RESYNC ( * bmc ) ) {
* bmc & = ~ RESYNC_MASK ;
if ( ! NEEDED ( * bmc ) & & aborted )
* bmc | = NEEDED_MASK ;
else {
2011-09-21 15:37:46 +10:00
if ( * bmc < = 2 ) {
2018-08-01 15:20:50 -07:00
md_bitmap_set_pending ( & bitmap - > counts , offset ) ;
2011-09-21 15:37:46 +10:00
bitmap - > allclean = 0 ;
}
2005-06-21 17:17:14 -07:00
}
}
unlock :
2012-05-22 13:55:24 +10:00
spin_unlock_irqrestore ( & bitmap - > counts . lock , flags ) ;
2005-06-21 17:17:14 -07:00
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL ( md_bitmap_end_sync ) ;
2005-06-21 17:17:14 -07:00
2018-08-01 15:20:50 -07:00
void md_bitmap_close_sync ( struct bitmap * bitmap )
2005-06-21 17:17:14 -07:00
{
/* Sync has finished, and any bitmap chunks that weren't synced
* properly have been aborted . It remains to us to clear the
* RESYNC bit wherever it is still on
*/
sector_t sector = 0 ;
2010-10-19 10:03:39 +11:00
sector_t blocks ;
2008-02-06 01:39:50 -08:00
if ( ! bitmap )
return ;
2005-06-21 17:17:14 -07:00
while ( sector < bitmap - > mddev - > resync_max_sectors ) {
2018-08-01 15:20:50 -07:00
md_bitmap_end_sync ( bitmap , sector , & blocks , 0 ) ;
2008-02-06 01:39:50 -08:00
sector + = blocks ;
}
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL ( md_bitmap_close_sync ) ;
2008-02-06 01:39:50 -08:00
2018-08-01 15:20:50 -07:00
void md_bitmap_cond_end_sync ( struct bitmap * bitmap , sector_t sector , bool force )
2008-02-06 01:39:50 -08:00
{
sector_t s = 0 ;
2010-10-19 10:03:39 +11:00
sector_t blocks ;
2008-02-06 01:39:50 -08:00
if ( ! bitmap )
return ;
if ( sector = = 0 ) {
bitmap - > last_end_sync = jiffies ;
return ;
}
2015-08-19 08:14:42 +10:00
if ( ! force & & time_before ( jiffies , ( bitmap - > last_end_sync
2009-12-14 12:49:53 +11:00
+ bitmap - > mddev - > bitmap_info . daemon_sleep ) ) )
2008-02-06 01:39:50 -08:00
return ;
wait_event ( bitmap - > mddev - > recovery_wait ,
atomic_read ( & bitmap - > mddev - > recovery_active ) = = 0 ) ;
2011-01-14 09:14:34 +11:00
bitmap - > mddev - > curr_resync_completed = sector ;
2016-12-08 15:48:19 -08:00
set_bit ( MD_SB_CHANGE_CLEAN , & bitmap - > mddev - > sb_flags ) ;
2012-05-22 13:55:24 +10:00
sector & = ~ ( ( 1ULL < < bitmap - > counts . chunkshift ) - 1 ) ;
2008-02-06 01:39:50 -08:00
s = 0 ;
while ( s < sector & & s < bitmap - > mddev - > resync_max_sectors ) {
2018-08-01 15:20:50 -07:00
md_bitmap_end_sync ( bitmap , s , & blocks , 0 ) ;
2008-02-06 01:39:50 -08:00
s + = blocks ;
2005-06-21 17:17:14 -07:00
}
2008-02-06 01:39:50 -08:00
bitmap - > last_end_sync = jiffies ;
2020-07-14 16:10:26 -07:00
sysfs_notify_dirent_safe ( bitmap - > mddev - > sysfs_completed ) ;
2005-06-21 17:17:14 -07:00
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL ( md_bitmap_cond_end_sync ) ;
2005-06-21 17:17:14 -07:00
2018-08-01 15:20:50 -07:00
void md_bitmap_sync_with_cluster ( struct mddev * mddev ,
2016-05-02 11:50:12 -04:00
sector_t old_lo , sector_t old_hi ,
sector_t new_lo , sector_t new_hi )
{
struct bitmap * bitmap = mddev - > bitmap ;
sector_t sector , blocks = 0 ;
for ( sector = old_lo ; sector < new_lo ; ) {
2018-08-01 15:20:50 -07:00
md_bitmap_end_sync ( bitmap , sector , & blocks , 0 ) ;
2016-05-02 11:50:12 -04:00
sector + = blocks ;
}
WARN ( ( blocks > new_lo ) & & old_lo , " alignment is not correct for lo \n " ) ;
for ( sector = old_hi ; sector < new_hi ; ) {
2018-08-01 15:20:50 -07:00
md_bitmap_start_sync ( bitmap , sector , & blocks , 0 ) ;
2016-05-02 11:50:12 -04:00
sector + = blocks ;
}
WARN ( ( blocks > new_hi ) & & old_hi , " alignment is not correct for hi \n " ) ;
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL ( md_bitmap_sync_with_cluster ) ;
2016-05-02 11:50:12 -04:00
2018-08-01 15:20:50 -07:00
static void md_bitmap_set_memory_bits ( struct bitmap * bitmap , sector_t offset , int needed )
2005-06-21 17:17:14 -07:00
{
/* For each chunk covered by any of these sectors, set the
2012-05-22 13:55:08 +10:00
* counter to 2 and possibly set resync_needed . They should all
2005-06-21 17:17:14 -07:00
* be 0 at this point
*/
2005-08-04 12:53:33 -07:00
2010-10-19 10:03:39 +11:00
sector_t secs ;
2005-08-04 12:53:33 -07:00
bitmap_counter_t * bmc ;
2012-05-22 13:55:24 +10:00
spin_lock_irq ( & bitmap - > counts . lock ) ;
2018-08-01 15:20:50 -07:00
bmc = md_bitmap_get_counter ( & bitmap - > counts , offset , & secs , 1 ) ;
2005-08-04 12:53:33 -07:00
if ( ! bmc ) {
2012-05-22 13:55:24 +10:00
spin_unlock_irq ( & bitmap - > counts . lock ) ;
2005-08-04 12:53:33 -07:00
return ;
2005-06-21 17:17:14 -07:00
}
2010-06-01 19:37:31 +10:00
if ( ! * bmc ) {
2014-06-07 00:36:26 -05:00
* bmc = 2 ;
2018-08-01 15:20:50 -07:00
md_bitmap_count_page ( & bitmap - > counts , offset , 1 ) ;
md_bitmap_set_pending ( & bitmap - > counts , offset ) ;
2011-09-21 15:37:46 +10:00
bitmap - > allclean = 0 ;
2005-08-04 12:53:33 -07:00
}
2014-06-07 00:36:26 -05:00
if ( needed )
* bmc | = NEEDED_MASK ;
2012-05-22 13:55:24 +10:00
spin_unlock_irq ( & bitmap - > counts . lock ) ;
2005-06-21 17:17:14 -07:00
}
2006-10-03 01:15:49 -07:00
/* dirty the memory and file bits for bitmap chunks "s" to "e" */
2018-08-01 15:20:50 -07:00
void md_bitmap_dirty_bits ( struct bitmap * bitmap , unsigned long s , unsigned long e )
2006-10-03 01:15:49 -07:00
{
unsigned long chunk ;
for ( chunk = s ; chunk < = e ; chunk + + ) {
2012-05-22 13:55:24 +10:00
sector_t sec = ( sector_t ) chunk < < bitmap - > counts . chunkshift ;
2018-08-01 15:20:50 -07:00
md_bitmap_set_memory_bits ( bitmap , sec , 1 ) ;
md_bitmap_file_set_bit ( bitmap , sec ) ;
2009-12-14 12:49:56 +11:00
if ( sec < bitmap - > mddev - > recovery_cp )
/* We are asserting that the array is dirty,
* so move the recovery_cp address back so
* that it is obvious that it is dirty
*/
bitmap - > mddev - > recovery_cp = sec ;
2006-10-03 01:15:49 -07:00
}
}
2005-08-04 12:53:35 -07:00
/*
* flush out any pending updates
*/
2018-08-01 15:20:50 -07:00
void md_bitmap_flush ( struct mddev * mddev )
2005-08-04 12:53:35 -07:00
{
struct bitmap * bitmap = mddev - > bitmap ;
2009-12-14 12:49:53 +11:00
long sleep ;
2005-08-04 12:53:35 -07:00
if ( ! bitmap ) /* there was no bitmap */
return ;
/* run the daemon_work three time to ensure everything is flushed
* that can be
*/
2009-12-14 12:49:53 +11:00
sleep = mddev - > bitmap_info . daemon_sleep * 2 ;
2009-12-14 12:49:53 +11:00
bitmap - > daemon_lastrun - = sleep ;
2018-08-01 15:20:50 -07:00
md_bitmap_daemon_work ( mddev ) ;
2009-12-14 12:49:53 +11:00
bitmap - > daemon_lastrun - = sleep ;
2018-08-01 15:20:50 -07:00
md_bitmap_daemon_work ( mddev ) ;
2009-12-14 12:49:53 +11:00
bitmap - > daemon_lastrun - = sleep ;
2018-08-01 15:20:50 -07:00
md_bitmap_daemon_work ( mddev ) ;
2021-04-13 04:08:29 +00:00
if ( mddev - > bitmap_info . external )
md_super_wait ( mddev ) ;
2018-08-01 15:20:50 -07:00
md_bitmap_update_sb ( bitmap ) ;
2005-08-04 12:53:35 -07:00
}
2005-06-21 17:17:14 -07:00
/*
* free memory that was allocated
*/
2018-08-01 15:20:50 -07:00
void md_bitmap_free ( struct bitmap * bitmap )
2005-06-21 17:17:14 -07:00
{
unsigned long k , pages ;
struct bitmap_page * bp ;
if ( ! bitmap ) /* there was no bitmap */
return ;
2016-04-01 17:08:49 +08:00
if ( bitmap - > sysfs_can_clear )
sysfs_put ( bitmap - > sysfs_can_clear ) ;
2014-06-06 12:43:49 -05:00
if ( mddev_is_clustered ( bitmap - > mddev ) & & bitmap - > mddev - > cluster_info & &
bitmap - > cluster_slot = = md_cluster_ops - > slot_number ( bitmap - > mddev ) )
2014-06-06 11:50:56 -05:00
md_cluster_stop ( bitmap - > mddev ) ;
2012-05-22 13:55:21 +10:00
/* Shouldn't be needed - but just in case.... */
wait_event ( bitmap - > write_wait ,
atomic_read ( & bitmap - > pending_writes ) = = 0 ) ;
/* release the bitmap file */
2018-08-01 15:20:50 -07:00
md_bitmap_file_unmap ( & bitmap - > storage ) ;
2005-06-21 17:17:14 -07:00
2012-05-22 13:55:24 +10:00
bp = bitmap - > counts . bp ;
pages = bitmap - > counts . pages ;
2005-06-21 17:17:14 -07:00
/* free all allocated memory */
if ( bp ) /* deallocate the page memory */
for ( k = 0 ; k < pages ; k + + )
if ( bp [ k ] . map & & ! bp [ k ] . hijacked )
kfree ( bp [ k ] . map ) ;
kfree ( bp ) ;
kfree ( bitmap ) ;
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL ( md_bitmap_free ) ;
2009-12-14 12:49:46 +11:00
2018-08-01 15:20:50 -07:00
void md_bitmap_wait_behind_writes ( struct mddev * mddev )
2017-03-14 09:40:20 +08:00
{
struct bitmap * bitmap = mddev - > bitmap ;
/* wait for behind writes to complete */
if ( bitmap & & atomic_read ( & bitmap - > behind_writes ) > 0 ) {
pr_debug ( " md:%s: behind writes in progress - waiting to stop. \n " ,
mdname ( mddev ) ) ;
/* need to kick something here to make sure I/O goes? */
wait_event ( bitmap - > behind_wait ,
atomic_read ( & bitmap - > behind_writes ) = = 0 ) ;
}
}
2018-08-01 15:20:50 -07:00
void md_bitmap_destroy ( struct mddev * mddev )
2005-09-09 16:23:50 -07:00
{
struct bitmap * bitmap = mddev - > bitmap ;
if ( ! bitmap ) /* there was no bitmap */
return ;
2018-08-01 15:20:50 -07:00
md_bitmap_wait_behind_writes ( mddev ) ;
2019-12-23 10:49:00 +01:00
if ( ! mddev - > serialize_policy )
2023-10-10 23:19:53 +08:00
mddev_destroy_serial_pool ( mddev , NULL ) ;
2017-03-14 09:40:20 +08:00
2009-12-14 12:49:52 +11:00
mutex_lock ( & mddev - > bitmap_info . mutex ) ;
2014-12-15 12:56:58 +11:00
spin_lock ( & mddev - > lock ) ;
2005-09-09 16:23:50 -07:00
mddev - > bitmap = NULL ; /* disconnect from the md device */
2014-12-15 12:56:58 +11:00
spin_unlock ( & mddev - > lock ) ;
2009-12-14 12:49:52 +11:00
mutex_unlock ( & mddev - > bitmap_info . mutex ) ;
md/bitmap: factor out a helper to set timeout
Register/unregister 'mddev->thread' are both under 'reconfig_mutex',
however, some context didn't hold the mutex to access mddev->thread,
which can cause null-ptr-deference:
1) md_bitmap_daemon_work() can be called from md_check_recovery() where
'reconfig_mutex' is not held, deference 'mddev->thread' might cause
null-ptr-deference, because md_unregister_thread() reset the pointer
before stopping the thread.
2) timeout_store() access 'mddev->thread' multiple times,
null-ptr-deference can be triggered if 'mddev->thread' is reset in the
middle.
This patch factor out a helper to set timeout, the new helper always
check if 'mddev->thread' is null first, so that problem 1 can be fixed.
Now that this helper only access 'mddev->thread' once, but it's possible
that 'mddev->thread' can be freed while this helper is still in progress,
hence the problem is not fixed yet. Follow up patches will fix this by
protecting md_thread with rcu.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230523021017.3048783-5-yukuai1@huaweicloud.com
2023-05-23 10:10:16 +08:00
mddev_set_timeout ( mddev , MAX_SCHEDULE_TIMEOUT , true ) ;
2005-09-09 16:23:50 -07:00
2018-08-01 15:20:50 -07:00
md_bitmap_free ( bitmap ) ;
2005-09-09 16:23:50 -07:00
}
2005-06-21 17:17:14 -07:00
/*
* initialize the bitmap structure
* if this returns an error , bitmap_destroy must be called to do clean up
2016-04-01 17:08:49 +08:00
* once mddev - > bitmap is set
2005-06-21 17:17:14 -07:00
*/
2018-08-01 15:20:50 -07:00
struct bitmap * md_bitmap_create ( struct mddev * mddev , int slot )
2005-06-21 17:17:14 -07:00
{
struct bitmap * bitmap ;
2009-04-20 11:50:24 +10:00
sector_t blocks = mddev - > resync_max_sectors ;
2009-12-14 12:49:52 +11:00
struct file * file = mddev - > bitmap_info . file ;
2005-06-21 17:17:14 -07:00
int err ;
2013-12-11 14:11:53 -05:00
struct kernfs_node * bm = NULL ;
2005-06-21 17:17:14 -07:00
2006-10-11 01:22:26 -07:00
BUILD_BUG_ON ( sizeof ( bitmap_super_t ) ! = 256 ) ;
2005-06-21 17:17:14 -07:00
2009-12-14 12:49:52 +11:00
BUG_ON ( file & & mddev - > bitmap_info . offset ) ;
2005-06-21 17:17:27 -07:00
2017-10-17 14:24:09 +11:00
if ( test_bit ( MD_HAS_JOURNAL , & mddev - > flags ) ) {
pr_notice ( " md/raid:%s: array with journal cannot have bitmap \n " ,
mdname ( mddev ) ) ;
return ERR_PTR ( - EBUSY ) ;
}
2006-01-06 00:20:32 -08:00
bitmap = kzalloc ( sizeof ( * bitmap ) , GFP_KERNEL ) ;
2005-06-21 17:17:14 -07:00
if ( ! bitmap )
2014-06-06 12:43:49 -05:00
return ERR_PTR ( - ENOMEM ) ;
2005-06-21 17:17:14 -07:00
2012-05-22 13:55:24 +10:00
spin_lock_init ( & bitmap - > counts . lock ) ;
2006-06-26 00:27:49 -07:00
atomic_set ( & bitmap - > pending_writes , 0 ) ;
init_waitqueue_head ( & bitmap - > write_wait ) ;
2007-02-08 14:20:37 -08:00
init_waitqueue_head ( & bitmap - > overflow_wait ) ;
2010-03-31 11:21:44 +11:00
init_waitqueue_head ( & bitmap - > behind_wait ) ;
2006-06-26 00:27:49 -07:00
2005-06-21 17:17:14 -07:00
bitmap - > mddev = mddev ;
2014-06-06 12:43:49 -05:00
bitmap - > cluster_slot = slot ;
2005-06-21 17:17:14 -07:00
2010-06-01 19:37:32 +10:00
if ( mddev - > kobj . sd )
2013-09-11 23:19:13 -04:00
bm = sysfs_get_dirent ( mddev - > kobj . sd , " bitmap " ) ;
2009-12-14 12:49:56 +11:00
if ( bm ) {
2013-09-11 23:19:13 -04:00
bitmap - > sysfs_can_clear = sysfs_get_dirent ( bm , " can_clear " ) ;
2009-12-14 12:49:56 +11:00
sysfs_put ( bm ) ;
} else
bitmap - > sysfs_can_clear = NULL ;
2012-05-22 13:55:10 +10:00
bitmap - > storage . file = file ;
2006-06-26 00:27:49 -07:00
if ( file ) {
get_file ( file ) ;
2009-10-16 15:56:01 +11:00
/* As future accesses to this file will use bmap,
* and bypass the page cache , we must sync the file
* first .
*/
2010-03-22 17:32:25 +01:00
vfs_fsync ( file , 1 ) ;
2006-06-26 00:27:49 -07:00
}
2009-12-14 12:49:53 +11:00
/* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
2011-06-08 17:59:30 -05:00
if ( ! mddev - > bitmap_info . external ) {
/*
* If ' MD_ARRAY_FIRST_USE ' is set , then device - mapper is
* instructing us to create a new on - disk bitmap instance .
*/
if ( test_and_clear_bit ( MD_ARRAY_FIRST_USE , & mddev - > flags ) )
2018-08-01 15:20:50 -07:00
err = md_bitmap_new_disk_sb ( bitmap ) ;
2011-06-08 17:59:30 -05:00
else
2018-08-01 15:20:50 -07:00
err = md_bitmap_read_sb ( bitmap ) ;
2011-06-08 17:59:30 -05:00
} else {
2009-12-14 12:49:56 +11:00
err = 0 ;
if ( mddev - > bitmap_info . chunksize = = 0 | |
mddev - > bitmap_info . daemon_sleep = = 0 )
/* chunksize and time_base need to be
* set first . */
err = - EINVAL ;
}
2005-06-21 17:17:14 -07:00
if ( err )
2005-09-09 16:23:50 -07:00
goto error ;
2005-06-21 17:17:14 -07:00
2009-12-14 12:49:56 +11:00
bitmap - > daemon_lastrun = jiffies ;
2018-08-01 15:20:50 -07:00
err = md_bitmap_resize ( bitmap , blocks , mddev - > bitmap_info . chunksize , 1 ) ;
2012-05-22 13:55:25 +10:00
if ( err )
2005-09-09 16:23:50 -07:00
goto error ;
2005-06-21 17:17:14 -07:00
2016-11-02 14:16:49 +11:00
pr_debug ( " created bitmap (%lu pages) for device %s \n " ,
bitmap - > counts . pages , bmname ( bitmap ) ) ;
2010-06-01 19:37:35 +10:00
2014-06-06 12:43:49 -05:00
err = test_bit ( BITMAP_WRITE_ERROR , & bitmap - > flags ) ? - EIO : 0 ;
if ( err )
goto error ;
2010-06-01 19:37:35 +10:00
2014-06-06 12:43:49 -05:00
return bitmap ;
2010-06-01 19:37:35 +10:00
error :
2018-08-01 15:20:50 -07:00
md_bitmap_free ( bitmap ) ;
2014-06-06 12:43:49 -05:00
return ERR_PTR ( err ) ;
2010-06-01 19:37:35 +10:00
}
2018-08-01 15:20:50 -07:00
int md_bitmap_load ( struct mddev * mddev )
2010-06-01 19:37:35 +10:00
{
int err = 0 ;
2011-07-27 11:00:37 +10:00
sector_t start = 0 ;
2010-06-01 19:37:35 +10:00
sector_t sector = 0 ;
struct bitmap * bitmap = mddev - > bitmap ;
2019-06-14 17:10:38 +08:00
struct md_rdev * rdev ;
2010-06-01 19:37:35 +10:00
if ( ! bitmap )
goto out ;
2019-06-14 17:10:38 +08:00
rdev_for_each ( rdev , mddev )
2023-10-10 23:19:53 +08:00
mddev_create_serial_pool ( mddev , rdev ) ;
2019-06-14 17:10:38 +08:00
2016-05-04 02:17:09 -04:00
if ( mddev_is_clustered ( mddev ) )
md_cluster_ops - > load_bitmaps ( mddev , mddev - > bitmap_info . nodes ) ;
2010-06-01 19:37:35 +10:00
/* Clear out old bitmap info first: Either there is none, or we
* are resuming after someone else has possibly changed things ,
* so we should forget old cached info .
* All chunks should be clean , but some might need_sync .
*/
while ( sector < mddev - > resync_max_sectors ) {
2010-10-19 10:03:39 +11:00
sector_t blocks ;
2018-08-01 15:20:50 -07:00
md_bitmap_start_sync ( bitmap , sector , & blocks , 0 ) ;
2010-06-01 19:37:35 +10:00
sector + = blocks ;
}
2018-08-01 15:20:50 -07:00
md_bitmap_close_sync ( bitmap ) ;
2010-06-01 19:37:35 +10:00
2011-07-27 11:00:37 +10:00
if ( mddev - > degraded = = 0
| | bitmap - > events_cleared = = mddev - > events )
/* no need to keep dirty bits to optimise a
* re - add of a missing device */
start = mddev - > recovery_cp ;
2012-04-12 16:05:06 +10:00
mutex_lock ( & mddev - > bitmap_info . mutex ) ;
2018-08-01 15:20:50 -07:00
err = md_bitmap_init_from_disk ( bitmap , start ) ;
2012-04-12 16:05:06 +10:00
mutex_unlock ( & mddev - > bitmap_info . mutex ) ;
2011-07-27 11:00:37 +10:00
2005-06-21 17:17:14 -07:00
if ( err )
2010-06-01 19:37:35 +10:00
goto out ;
2012-05-22 13:55:15 +10:00
clear_bit ( BITMAP_STALE , & bitmap - > flags ) ;
2012-05-22 13:55:08 +10:00
/* Kick recovery in case any bits were set */
set_bit ( MD_RECOVERY_NEEDED , & bitmap - > mddev - > recovery ) ;
2005-09-09 16:23:50 -07:00
md/bitmap: factor out a helper to set timeout
Register/unregister 'mddev->thread' are both under 'reconfig_mutex',
however, some context didn't hold the mutex to access mddev->thread,
which can cause null-ptr-deference:
1) md_bitmap_daemon_work() can be called from md_check_recovery() where
'reconfig_mutex' is not held, deference 'mddev->thread' might cause
null-ptr-deference, because md_unregister_thread() reset the pointer
before stopping the thread.
2) timeout_store() access 'mddev->thread' multiple times,
null-ptr-deference can be triggered if 'mddev->thread' is reset in the
middle.
This patch factor out a helper to set timeout, the new helper always
check if 'mddev->thread' is null first, so that problem 1 can be fixed.
Now that this helper only access 'mddev->thread' once, but it's possible
that 'mddev->thread' can be freed while this helper is still in progress,
hence the problem is not fixed yet. Follow up patches will fix this by
protecting md_thread with rcu.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230523021017.3048783-5-yukuai1@huaweicloud.com
2023-05-23 10:10:16 +08:00
mddev_set_timeout ( mddev , mddev - > bitmap_info . daemon_sleep , true ) ;
2009-12-14 12:49:54 +11:00
md_wakeup_thread ( mddev - > thread ) ;
2006-01-06 00:20:16 -08:00
2018-08-01 15:20:50 -07:00
md_bitmap_update_sb ( bitmap ) ;
2007-07-17 04:06:13 -07:00
2012-05-22 13:55:15 +10:00
if ( test_bit ( BITMAP_WRITE_ERROR , & bitmap - > flags ) )
2010-06-01 19:37:35 +10:00
err = - EIO ;
out :
2005-09-09 16:23:50 -07:00
return err ;
2005-06-21 17:17:14 -07:00
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL_GPL ( md_bitmap_load ) ;
2005-06-21 17:17:14 -07:00
2020-09-27 13:40:13 +08:00
/* caller need to free returned bitmap with md_bitmap_free() */
2017-03-01 16:42:39 +08:00
struct bitmap * get_bitmap_from_slot ( struct mddev * mddev , int slot )
{
int rv = 0 ;
struct bitmap * bitmap ;
2018-08-01 15:20:50 -07:00
bitmap = md_bitmap_create ( mddev , slot ) ;
2017-03-01 16:42:39 +08:00
if ( IS_ERR ( bitmap ) ) {
rv = PTR_ERR ( bitmap ) ;
return ERR_PTR ( rv ) ;
}
2018-08-01 15:20:50 -07:00
rv = md_bitmap_init_from_disk ( bitmap , 0 ) ;
2017-03-01 16:42:39 +08:00
if ( rv ) {
2018-08-01 15:20:50 -07:00
md_bitmap_free ( bitmap ) ;
2017-03-01 16:42:39 +08:00
return ERR_PTR ( rv ) ;
}
return bitmap ;
}
EXPORT_SYMBOL ( get_bitmap_from_slot ) ;
2014-06-07 00:36:26 -05:00
/* Loads the bitmap associated with slot and copies the resync information
* to our bitmap
*/
2018-08-01 15:20:50 -07:00
int md_bitmap_copy_from_slot ( struct mddev * mddev , int slot ,
2015-04-14 10:45:42 -05:00
sector_t * low , sector_t * high , bool clear_bits )
2014-06-07 00:36:26 -05:00
{
int rv = 0 , i , j ;
sector_t block , lo = 0 , hi = 0 ;
struct bitmap_counts * counts ;
2017-03-01 16:42:39 +08:00
struct bitmap * bitmap ;
2014-06-07 00:36:26 -05:00
2017-03-01 16:42:39 +08:00
bitmap = get_bitmap_from_slot ( mddev , slot ) ;
if ( IS_ERR ( bitmap ) ) {
pr_err ( " %s can't get bitmap from slot %d \n " , __func__ , slot ) ;
return - 1 ;
}
2014-06-07 00:36:26 -05:00
counts = & bitmap - > counts ;
for ( j = 0 ; j < counts - > chunks ; j + + ) {
block = ( sector_t ) j < < counts - > chunkshift ;
2018-08-01 15:20:50 -07:00
if ( md_bitmap_file_test_bit ( bitmap , block ) ) {
2014-06-07 00:36:26 -05:00
if ( ! lo )
lo = block ;
hi = block ;
2018-08-01 15:20:50 -07:00
md_bitmap_file_clear_bit ( bitmap , block ) ;
md_bitmap_set_memory_bits ( mddev - > bitmap , block , 1 ) ;
md_bitmap_file_set_bit ( mddev - > bitmap , block ) ;
2014-06-07 00:36:26 -05:00
}
}
2015-04-14 10:45:42 -05:00
if ( clear_bits ) {
2018-08-01 15:20:50 -07:00
md_bitmap_update_sb ( bitmap ) ;
2016-05-02 11:50:15 -04:00
/* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs
* BITMAP_PAGE_DIRTY or _NEEDWRITE to write . . . */
2015-04-14 10:45:42 -05:00
for ( i = 0 ; i < bitmap - > storage . file_pages ; i + + )
2016-05-02 11:50:15 -04:00
if ( test_page_attr ( bitmap , i , BITMAP_PAGE_PENDING ) )
set_page_attr ( bitmap , i , BITMAP_PAGE_NEEDWRITE ) ;
2018-08-01 15:20:50 -07:00
md_bitmap_unplug ( bitmap ) ;
2015-04-14 10:45:42 -05:00
}
2018-08-01 15:20:50 -07:00
md_bitmap_unplug ( mddev - > bitmap ) ;
2014-06-07 00:36:26 -05:00
* low = lo ;
* high = hi ;
2020-09-27 13:40:13 +08:00
md_bitmap_free ( bitmap ) ;
2017-03-01 16:42:39 +08:00
2014-06-07 00:36:26 -05:00
return rv ;
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL_GPL ( md_bitmap_copy_from_slot ) ;
2014-06-07 00:36:26 -05:00
2018-08-01 15:20:50 -07:00
void md_bitmap_status ( struct seq_file * seq , struct bitmap * bitmap )
2012-03-19 12:46:40 +11:00
{
unsigned long chunk_kb ;
2012-05-22 13:55:24 +10:00
struct bitmap_counts * counts ;
2012-03-19 12:46:40 +11:00
if ( ! bitmap )
return ;
2012-05-22 13:55:24 +10:00
counts = & bitmap - > counts ;
2012-03-19 12:46:40 +11:00
chunk_kb = bitmap - > mddev - > bitmap_info . chunksize > > 10 ;
seq_printf ( seq , " bitmap: %lu/%lu pages [%luKB], "
" %lu%s chunk " ,
2012-05-22 13:55:24 +10:00
counts - > pages - counts - > missing_pages ,
counts - > pages ,
( counts - > pages - counts - > missing_pages )
2012-03-19 12:46:40 +11:00
< < ( PAGE_SHIFT - 10 ) ,
chunk_kb ? chunk_kb : bitmap - > mddev - > bitmap_info . chunksize ,
chunk_kb ? " KB " : " B " ) ;
2012-05-22 13:55:10 +10:00
if ( bitmap - > storage . file ) {
2012-03-19 12:46:40 +11:00
seq_printf ( seq , " , file: " ) ;
2015-06-19 10:30:28 +02:00
seq_file_path ( seq , bitmap - > storage . file , " \t \n " ) ;
2012-03-19 12:46:40 +11:00
}
seq_printf ( seq , " \n " ) ;
}
2018-08-01 15:20:50 -07:00
int md_bitmap_resize ( struct bitmap * bitmap , sector_t blocks ,
2012-05-22 13:55:25 +10:00
int chunksize , int init )
{
/* If chunk_size is 0, choose an appropriate chunk size.
* Then possibly allocate new storage space .
* Then quiesce , copy bits , replace bitmap , and re - start
*
* This function is called both to set up the initial bitmap
* and to resize the bitmap while the array is active .
* If this happens as a result of the array being resized ,
* chunksize will be zero , and we need to choose a suitable
* chunksize , otherwise we use what we are given .
*/
struct bitmap_storage store ;
struct bitmap_counts old_counts ;
unsigned long chunks ;
sector_t block ;
sector_t old_blocks , new_blocks ;
int chunkshift ;
int ret = 0 ;
long pages ;
struct bitmap_page * new_bp ;
2017-08-31 10:23:25 +10:00
if ( bitmap - > storage . file & & ! init ) {
pr_info ( " md: cannot resize file-based bitmap \n " ) ;
return - EINVAL ;
}
2012-05-22 13:55:25 +10:00
if ( chunksize = = 0 ) {
/* If there is enough space, leave the chunk size unchanged,
* else increase by factor of two until there is enough space .
*/
long bytes ;
long space = bitmap - > mddev - > bitmap_info . space ;
if ( space = = 0 ) {
/* We don't know how much space there is, so limit
* to current size - in sectors .
*/
bytes = DIV_ROUND_UP ( bitmap - > counts . chunks , 8 ) ;
if ( ! bitmap - > mddev - > bitmap_info . external )
bytes + = sizeof ( bitmap_super_t ) ;
space = DIV_ROUND_UP ( bytes , 512 ) ;
bitmap - > mddev - > bitmap_info . space = space ;
}
chunkshift = bitmap - > counts . chunkshift ;
chunkshift - - ;
do {
/* 'chunkshift' is shift from block size to chunk size */
chunkshift + + ;
chunks = DIV_ROUND_UP_SECTOR_T ( blocks , 1 < < chunkshift ) ;
bytes = DIV_ROUND_UP ( chunks , 8 ) ;
if ( ! bitmap - > mddev - > bitmap_info . external )
bytes + = sizeof ( bitmap_super_t ) ;
2022-10-25 09:37:05 +02:00
} while ( bytes > ( space < < 9 ) & & ( chunkshift + BITMAP_BLOCK_SHIFT ) <
( BITS_PER_BYTE * sizeof ( ( ( bitmap_super_t * ) 0 ) - > chunksize ) - 1 ) ) ;
2012-05-22 13:55:25 +10:00
} else
chunkshift = ffz ( ~ chunksize ) - BITMAP_BLOCK_SHIFT ;
chunks = DIV_ROUND_UP_SECTOR_T ( blocks , 1 < < chunkshift ) ;
memset ( & store , 0 , sizeof ( store ) ) ;
if ( bitmap - > mddev - > bitmap_info . offset | | bitmap - > mddev - > bitmap_info . file )
2018-08-01 15:20:50 -07:00
ret = md_bitmap_storage_alloc ( & store , chunks ,
! bitmap - > mddev - > bitmap_info . external ,
mddev_is_clustered ( bitmap - > mddev )
? bitmap - > cluster_slot : 0 ) ;
2016-10-31 10:19:00 +08:00
if ( ret ) {
2018-08-01 15:20:50 -07:00
md_bitmap_file_unmap ( & store ) ;
2012-05-22 13:55:25 +10:00
goto err ;
2016-10-31 10:19:00 +08:00
}
2012-05-22 13:55:25 +10:00
pages = DIV_ROUND_UP ( chunks , PAGE_COUNTER_RATIO ) ;
treewide: kzalloc() -> kcalloc()
The kzalloc() function has a 2-factor argument form, kcalloc(). This
patch replaces cases of:
kzalloc(a * b, gfp)
with:
kcalloc(a * b, gfp)
as well as handling cases of:
kzalloc(a * b * c, gfp)
with:
kzalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kzalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kzalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kzalloc
+ kcalloc
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kzalloc(sizeof(THING) * C2, ...)
|
kzalloc(sizeof(TYPE) * C2, ...)
|
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(C1 * C2, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * E2
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 14:03:40 -07:00
new_bp = kcalloc ( pages , sizeof ( * new_bp ) , GFP_KERNEL ) ;
2012-05-22 13:55:25 +10:00
ret = - ENOMEM ;
if ( ! new_bp ) {
2018-08-01 15:20:50 -07:00
md_bitmap_file_unmap ( & store ) ;
2012-05-22 13:55:25 +10:00
goto err ;
}
if ( ! init )
bitmap - > mddev - > pers - > quiesce ( bitmap - > mddev , 1 ) ;
store . file = bitmap - > storage . file ;
bitmap - > storage . file = NULL ;
if ( store . sb_page & & bitmap - > storage . sb_page )
memcpy ( page_address ( store . sb_page ) ,
page_address ( bitmap - > storage . sb_page ) ,
2017-10-16 19:03:44 -07:00
sizeof ( bitmap_super_t ) ) ;
md/bitmap: avoid race window between md_bitmap_resize and bitmap_file_clear_bit
We need to move "spin_lock_irq(&bitmap->counts.lock)" before unmap previous
storage, otherwise panic like belows could happen as follows.
[ 902.353802] sdl: detected capacity change from 1077936128 to 3221225472
[ 902.616948] general protection fault: 0000 [#1] SMP
[snip]
[ 902.618588] CPU: 12 PID: 33698 Comm: md0_raid1 Tainted: G O 4.14.144-1-pserver #4.14.144-1.1~deb10
[ 902.618870] Hardware name: Supermicro SBA-7142G-T4/BHQGE, BIOS 3.00 10/24/2012
[ 902.619120] task: ffff9ae1860fc600 task.stack: ffffb52e4c704000
[ 902.619301] RIP: 0010:bitmap_file_clear_bit+0x90/0xd0 [md_mod]
[ 902.619464] RSP: 0018:ffffb52e4c707d28 EFLAGS: 00010087
[ 902.619626] RAX: ffe8008b0d061000 RBX: ffff9ad078c87300 RCX: 0000000000000000
[ 902.619792] RDX: ffff9ad986341868 RSI: 0000000000000803 RDI: ffff9ad078c87300
[ 902.619986] RBP: ffff9ad0ed7a8000 R08: 0000000000000000 R09: 0000000000000000
[ 902.620154] R10: ffffb52e4c707ec0 R11: ffff9ad987d1ed44 R12: ffff9ad0ed7a8360
[ 902.620320] R13: 0000000000000003 R14: 0000000000060000 R15: 0000000000000800
[ 902.620487] FS: 0000000000000000(0000) GS:ffff9ad987d00000(0000) knlGS:0000000000000000
[ 902.620738] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 902.620901] CR2: 000055ff12aecec0 CR3: 0000001005207000 CR4: 00000000000406e0
[ 902.621068] Call Trace:
[ 902.621256] bitmap_daemon_work+0x2dd/0x360 [md_mod]
[ 902.621429] ? find_pers+0x70/0x70 [md_mod]
[ 902.621597] md_check_recovery+0x51/0x540 [md_mod]
[ 902.621762] raid1d+0x5c/0xeb0 [raid1]
[ 902.621939] ? try_to_del_timer_sync+0x4d/0x80
[ 902.622102] ? del_timer_sync+0x35/0x40
[ 902.622265] ? schedule_timeout+0x177/0x360
[ 902.622453] ? call_timer_fn+0x130/0x130
[ 902.622623] ? find_pers+0x70/0x70 [md_mod]
[ 902.622794] ? md_thread+0x94/0x150 [md_mod]
[ 902.622959] md_thread+0x94/0x150 [md_mod]
[ 902.623121] ? wait_woken+0x80/0x80
[ 902.623280] kthread+0x119/0x130
[ 902.623437] ? kthread_create_on_node+0x60/0x60
[ 902.623600] ret_from_fork+0x22/0x40
[ 902.624225] RIP: bitmap_file_clear_bit+0x90/0xd0 [md_mod] RSP: ffffb52e4c707d28
Because mdadm was running on another cpu to do resize, so bitmap_resize was
called to replace bitmap as below shows.
PID: 38801 TASK: ffff9ad074a90e00 CPU: 0 COMMAND: "mdadm"
[exception RIP: queued_spin_lock_slowpath+56]
[snip]
-- <NMI exception stack> --
#5 [ffffb52e60f17c58] queued_spin_lock_slowpath at ffffffff9c0b27b8
#6 [ffffb52e60f17c58] bitmap_resize at ffffffffc0399877 [md_mod]
#7 [ffffb52e60f17d30] raid1_resize at ffffffffc0285bf9 [raid1]
#8 [ffffb52e60f17d50] update_size at ffffffffc038a31a [md_mod]
#9 [ffffb52e60f17d70] md_ioctl at ffffffffc0395ca4 [md_mod]
And the procedure to keep resize bitmap safe is allocate new storage
space, then quiesce, copy bits, replace bitmap, and re-start.
However the daemon (bitmap_daemon_work) could happen even the array is
quiesced, which means when bitmap_file_clear_bit is triggered by raid1d,
then it thinks it should be fine to access store->filemap since
counts->lock is held, but resize could change the storage without the
protection of the lock.
Cc: Jack Wang <jinpu.wang@cloud.ionos.com>
Cc: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
2019-09-26 13:53:50 +02:00
spin_lock_irq ( & bitmap - > counts . lock ) ;
2018-08-01 15:20:50 -07:00
md_bitmap_file_unmap ( & bitmap - > storage ) ;
2012-05-22 13:55:25 +10:00
bitmap - > storage = store ;
old_counts = bitmap - > counts ;
bitmap - > counts . bp = new_bp ;
bitmap - > counts . pages = pages ;
bitmap - > counts . missing_pages = pages ;
bitmap - > counts . chunkshift = chunkshift ;
bitmap - > counts . chunks = chunks ;
2022-10-25 09:37:05 +02:00
bitmap - > mddev - > bitmap_info . chunksize = 1UL < < ( chunkshift +
2012-05-22 13:55:25 +10:00
BITMAP_BLOCK_SHIFT ) ;
blocks = min ( old_counts . chunks < < old_counts . chunkshift ,
chunks < < chunkshift ) ;
2016-05-02 11:50:11 -04:00
/* For cluster raid, need to pre-allocate bitmap */
if ( mddev_is_clustered ( bitmap - > mddev ) ) {
unsigned long page ;
for ( page = 0 ; page < pages ; page + + ) {
2018-08-01 15:20:50 -07:00
ret = md_bitmap_checkpage ( & bitmap - > counts , page , 1 , 1 ) ;
2016-05-02 11:50:11 -04:00
if ( ret ) {
unsigned long k ;
/* deallocate the page memory */
for ( k = 0 ; k < page ; k + + ) {
2016-05-02 11:50:16 -04:00
kfree ( new_bp [ k ] . map ) ;
2016-05-02 11:50:11 -04:00
}
2017-11-08 13:44:56 +01:00
kfree ( new_bp ) ;
2016-05-02 11:50:11 -04:00
/* restore some fields from old_counts */
bitmap - > counts . bp = old_counts . bp ;
bitmap - > counts . pages = old_counts . pages ;
bitmap - > counts . missing_pages = old_counts . pages ;
bitmap - > counts . chunkshift = old_counts . chunkshift ;
bitmap - > counts . chunks = old_counts . chunks ;
2022-10-25 09:37:05 +02:00
bitmap - > mddev - > bitmap_info . chunksize =
1UL < < ( old_counts . chunkshift + BITMAP_BLOCK_SHIFT ) ;
2016-05-02 11:50:11 -04:00
blocks = old_counts . chunks < < old_counts . chunkshift ;
2016-11-02 14:16:49 +11:00
pr_warn ( " Could not pre-allocate in-memory bitmap for cluster raid \n " ) ;
2016-05-02 11:50:11 -04:00
break ;
} else
bitmap - > counts . bp [ page ] . count + = 1 ;
}
}
2012-05-22 13:55:25 +10:00
for ( block = 0 ; block < blocks ; ) {
bitmap_counter_t * bmc_old , * bmc_new ;
int set ;
2018-08-01 15:20:50 -07:00
bmc_old = md_bitmap_get_counter ( & old_counts , block , & old_blocks , 0 ) ;
2012-05-22 13:55:25 +10:00
set = bmc_old & & NEEDED ( * bmc_old ) ;
if ( set ) {
2018-08-01 15:20:50 -07:00
bmc_new = md_bitmap_get_counter ( & bitmap - > counts , block , & new_blocks , 1 ) ;
2022-09-16 16:33:05 -07:00
if ( bmc_new ) {
if ( * bmc_new = = 0 ) {
/* need to set on-disk bits too. */
sector_t end = block + new_blocks ;
sector_t start = block > > chunkshift ;
start < < = chunkshift ;
while ( start < end ) {
md_bitmap_file_set_bit ( bitmap , block ) ;
start + = 1 < < chunkshift ;
}
* bmc_new = 2 ;
md_bitmap_count_page ( & bitmap - > counts , block , 1 ) ;
md_bitmap_set_pending ( & bitmap - > counts , block ) ;
2012-05-22 13:55:25 +10:00
}
2022-09-16 16:33:05 -07:00
* bmc_new | = NEEDED_MASK ;
2012-05-22 13:55:25 +10:00
}
if ( new_blocks < old_blocks )
old_blocks = new_blocks ;
}
block + = old_blocks ;
}
2017-11-08 13:44:56 +01:00
if ( bitmap - > counts . bp ! = old_counts . bp ) {
unsigned long k ;
for ( k = 0 ; k < old_counts . pages ; k + + )
if ( ! old_counts . bp [ k ] . hijacked )
kfree ( old_counts . bp [ k ] . map ) ;
kfree ( old_counts . bp ) ;
}
2012-05-22 13:55:25 +10:00
if ( ! init ) {
int i ;
while ( block < ( chunks < < chunkshift ) ) {
bitmap_counter_t * bmc ;
2018-08-01 15:20:50 -07:00
bmc = md_bitmap_get_counter ( & bitmap - > counts , block , & new_blocks , 1 ) ;
2012-05-22 13:55:25 +10:00
if ( bmc ) {
/* new space. It needs to be resynced, so
* we set NEEDED_MASK .
*/
if ( * bmc = = 0 ) {
* bmc = NEEDED_MASK | 2 ;
2018-08-01 15:20:50 -07:00
md_bitmap_count_page ( & bitmap - > counts , block , 1 ) ;
md_bitmap_set_pending ( & bitmap - > counts , block ) ;
2012-05-22 13:55:25 +10:00
}
}
block + = new_blocks ;
}
for ( i = 0 ; i < bitmap - > storage . file_pages ; i + + )
set_page_attr ( bitmap , i , BITMAP_PAGE_DIRTY ) ;
}
spin_unlock_irq ( & bitmap - > counts . lock ) ;
if ( ! init ) {
2018-08-01 15:20:50 -07:00
md_bitmap_unplug ( bitmap ) ;
2012-05-22 13:55:25 +10:00
bitmap - > mddev - > pers - > quiesce ( bitmap - > mddev , 0 ) ;
}
ret = 0 ;
err :
return ret ;
}
2018-08-01 15:20:50 -07:00
EXPORT_SYMBOL_GPL ( md_bitmap_resize ) ;
2012-05-22 13:55:25 +10:00
2009-12-14 12:49:55 +11:00
static ssize_t
2011-10-11 16:47:53 +11:00
location_show ( struct mddev * mddev , char * page )
2009-12-14 12:49:55 +11:00
{
ssize_t len ;
2010-06-01 19:37:31 +10:00
if ( mddev - > bitmap_info . file )
2009-12-14 12:49:55 +11:00
len = sprintf ( page , " file " ) ;
2010-06-01 19:37:31 +10:00
else if ( mddev - > bitmap_info . offset )
2009-12-14 12:49:55 +11:00
len = sprintf ( page , " %+lld " , ( long long ) mddev - > bitmap_info . offset ) ;
2010-06-01 19:37:31 +10:00
else
2009-12-14 12:49:55 +11:00
len = sprintf ( page , " none " ) ;
len + = sprintf ( page + len , " \n " ) ;
return len ;
}
static ssize_t
2011-10-11 16:47:53 +11:00
location_store ( struct mddev * mddev , const char * buf , size_t len )
2009-12-14 12:49:55 +11:00
{
2016-07-30 10:05:31 -07:00
int rv ;
2009-12-14 12:49:55 +11:00
2023-10-10 23:19:46 +08:00
rv = mddev_suspend_and_lock ( mddev ) ;
2016-07-30 10:05:31 -07:00
if ( rv )
return rv ;
2023-08-25 11:09:54 +08:00
2009-12-14 12:49:55 +11:00
if ( mddev - > pers ) {
2016-07-30 10:05:31 -07:00
if ( mddev - > recovery | | mddev - > sync_thread ) {
rv = - EBUSY ;
goto out ;
}
2009-12-14 12:49:55 +11:00
}
if ( mddev - > bitmap | | mddev - > bitmap_info . file | |
mddev - > bitmap_info . offset ) {
/* bitmap already configured. Only option is to clear it */
2016-07-30 10:05:31 -07:00
if ( strncmp ( buf , " none " , 4 ) ! = 0 ) {
rv = - EBUSY ;
goto out ;
}
2023-08-25 11:09:54 +08:00
md_bitmap_destroy ( mddev ) ;
2009-12-14 12:49:55 +11:00
mddev - > bitmap_info . offset = 0 ;
if ( mddev - > bitmap_info . file ) {
struct file * f = mddev - > bitmap_info . file ;
mddev - > bitmap_info . file = NULL ;
fput ( f ) ;
}
} else {
/* No bitmap, OK to set a location */
long long offset ;
2023-08-25 11:09:54 +08:00
struct bitmap * bitmap ;
2009-12-14 12:49:55 +11:00
if ( strncmp ( buf , " none " , 4 ) = = 0 )
/* nothing to be done */ ;
else if ( strncmp ( buf , " file: " , 5 ) = = 0 ) {
/* Not supported yet */
2016-07-30 10:05:31 -07:00
rv = - EINVAL ;
goto out ;
2009-12-14 12:49:55 +11:00
} else {
if ( buf [ 0 ] = = ' + ' )
2013-06-01 16:15:16 +09:00
rv = kstrtoll ( buf + 1 , 10 , & offset ) ;
2009-12-14 12:49:55 +11:00
else
2013-06-01 16:15:16 +09:00
rv = kstrtoll ( buf , 10 , & offset ) ;
2009-12-14 12:49:55 +11:00
if ( rv )
2016-07-30 10:05:31 -07:00
goto out ;
if ( offset = = 0 ) {
rv = - EINVAL ;
goto out ;
}
2009-12-14 12:49:56 +11:00
if ( mddev - > bitmap_info . external = = 0 & &
mddev - > major_version = = 0 & &
2016-07-30 10:05:31 -07:00
offset ! = mddev - > bitmap_info . default_offset ) {
rv = - EINVAL ;
goto out ;
}
2023-08-25 11:09:54 +08:00
2009-12-14 12:49:55 +11:00
mddev - > bitmap_info . offset = offset ;
2023-08-25 11:09:54 +08:00
bitmap = md_bitmap_create ( mddev , - 1 ) ;
if ( IS_ERR ( bitmap ) ) {
rv = PTR_ERR ( bitmap ) ;
goto out ;
}
mddev - > bitmap = bitmap ;
rv = md_bitmap_load ( mddev ) ;
if ( rv ) {
mddev - > bitmap_info . offset = 0 ;
md_bitmap_destroy ( mddev ) ;
goto out ;
2009-12-14 12:49:55 +11:00
}
}
}
if ( ! mddev - > external ) {
/* Ensure new bitmap info is stored in
* metadata promptly .
*/
2016-12-08 15:48:19 -08:00
set_bit ( MD_SB_CHANGE_DEVS , & mddev - > sb_flags ) ;
2009-12-14 12:49:55 +11:00
md_wakeup_thread ( mddev - > thread ) ;
}
2016-07-30 10:05:31 -07:00
rv = 0 ;
out :
2023-10-10 23:19:46 +08:00
mddev_unlock_and_resume ( mddev ) ;
2016-07-30 10:05:31 -07:00
if ( rv )
return rv ;
2009-12-14 12:49:55 +11:00
return len ;
}
static struct md_sysfs_entry bitmap_location =
__ATTR ( location , S_IRUGO | S_IWUSR , location_show , location_store ) ;
2012-05-22 13:55:07 +10:00
/* 'bitmap/space' is the space available at 'location' for the
* bitmap . This allows the kernel to know when it is safe to
* resize the bitmap to match a resized array .
*/
static ssize_t
space_show ( struct mddev * mddev , char * page )
{
return sprintf ( page , " %lu \n " , mddev - > bitmap_info . space ) ;
}
static ssize_t
space_store ( struct mddev * mddev , const char * buf , size_t len )
{
unsigned long sectors ;
int rv ;
rv = kstrtoul ( buf , 10 , & sectors ) ;
if ( rv )
return rv ;
if ( sectors = = 0 )
return - EINVAL ;
if ( mddev - > bitmap & &
2012-05-22 13:55:11 +10:00
sectors < ( mddev - > bitmap - > storage . bytes + 511 ) > > 9 )
2012-05-22 13:55:07 +10:00
return - EFBIG ; /* Bitmap is too big for this small space */
/* could make sure it isn't too big, but that isn't really
* needed - user - space should be careful .
*/
mddev - > bitmap_info . space = sectors ;
return len ;
}
static struct md_sysfs_entry bitmap_space =
__ATTR ( space , S_IRUGO | S_IWUSR , space_show , space_store ) ;
2009-12-14 12:49:55 +11:00
static ssize_t
2011-10-11 16:47:53 +11:00
timeout_show ( struct mddev * mddev , char * page )
2009-12-14 12:49:55 +11:00
{
ssize_t len ;
unsigned long secs = mddev - > bitmap_info . daemon_sleep / HZ ;
unsigned long jifs = mddev - > bitmap_info . daemon_sleep % HZ ;
2010-06-01 19:37:31 +10:00
2009-12-14 12:49:55 +11:00
len = sprintf ( page , " %lu " , secs ) ;
if ( jifs )
len + = sprintf ( page + len , " .%03u " , jiffies_to_msecs ( jifs ) ) ;
len + = sprintf ( page + len , " \n " ) ;
return len ;
}
static ssize_t
2011-10-11 16:47:53 +11:00
timeout_store ( struct mddev * mddev , const char * buf , size_t len )
2009-12-14 12:49:55 +11:00
{
/* timeout can be set at any time */
unsigned long timeout ;
int rv = strict_strtoul_scaled ( buf , & timeout , 4 ) ;
if ( rv )
return rv ;
/* just to make sure we don't overflow... */
if ( timeout > = LONG_MAX / HZ )
return - EINVAL ;
timeout = timeout * HZ / 10000 ;
if ( timeout > = MAX_SCHEDULE_TIMEOUT )
timeout = MAX_SCHEDULE_TIMEOUT - 1 ;
if ( timeout < 1 )
timeout = 1 ;
2023-05-23 10:10:15 +08:00
md/bitmap: factor out a helper to set timeout
Register/unregister 'mddev->thread' are both under 'reconfig_mutex',
however, some context didn't hold the mutex to access mddev->thread,
which can cause null-ptr-deference:
1) md_bitmap_daemon_work() can be called from md_check_recovery() where
'reconfig_mutex' is not held, deference 'mddev->thread' might cause
null-ptr-deference, because md_unregister_thread() reset the pointer
before stopping the thread.
2) timeout_store() access 'mddev->thread' multiple times,
null-ptr-deference can be triggered if 'mddev->thread' is reset in the
middle.
This patch factor out a helper to set timeout, the new helper always
check if 'mddev->thread' is null first, so that problem 1 can be fixed.
Now that this helper only access 'mddev->thread' once, but it's possible
that 'mddev->thread' can be freed while this helper is still in progress,
hence the problem is not fixed yet. Follow up patches will fix this by
protecting md_thread with rcu.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230523021017.3048783-5-yukuai1@huaweicloud.com
2023-05-23 10:10:16 +08:00
mddev - > bitmap_info . daemon_sleep = timeout ;
mddev_set_timeout ( mddev , timeout , false ) ;
2023-05-23 10:10:15 +08:00
md_wakeup_thread ( mddev - > thread ) ;
md/bitmap: factor out a helper to set timeout
Register/unregister 'mddev->thread' are both under 'reconfig_mutex',
however, some context didn't hold the mutex to access mddev->thread,
which can cause null-ptr-deference:
1) md_bitmap_daemon_work() can be called from md_check_recovery() where
'reconfig_mutex' is not held, deference 'mddev->thread' might cause
null-ptr-deference, because md_unregister_thread() reset the pointer
before stopping the thread.
2) timeout_store() access 'mddev->thread' multiple times,
null-ptr-deference can be triggered if 'mddev->thread' is reset in the
middle.
This patch factor out a helper to set timeout, the new helper always
check if 'mddev->thread' is null first, so that problem 1 can be fixed.
Now that this helper only access 'mddev->thread' once, but it's possible
that 'mddev->thread' can be freed while this helper is still in progress,
hence the problem is not fixed yet. Follow up patches will fix this by
protecting md_thread with rcu.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230523021017.3048783-5-yukuai1@huaweicloud.com
2023-05-23 10:10:16 +08:00
2009-12-14 12:49:55 +11:00
return len ;
}
static struct md_sysfs_entry bitmap_timeout =
__ATTR ( time_base , S_IRUGO | S_IWUSR , timeout_show , timeout_store ) ;
static ssize_t
2011-10-11 16:47:53 +11:00
backlog_show ( struct mddev * mddev , char * page )
2009-12-14 12:49:55 +11:00
{
return sprintf ( page , " %lu \n " , mddev - > bitmap_info . max_write_behind ) ;
}
static ssize_t
2011-10-11 16:47:53 +11:00
backlog_store ( struct mddev * mddev , const char * buf , size_t len )
2009-12-14 12:49:55 +11:00
{
unsigned long backlog ;
2019-06-14 17:10:37 +08:00
unsigned long old_mwb = mddev - > bitmap_info . max_write_behind ;
2021-10-17 21:50:17 +08:00
struct md_rdev * rdev ;
bool has_write_mostly = false ;
2013-06-01 16:15:16 +09:00
int rv = kstrtoul ( buf , 10 , & backlog ) ;
2009-12-14 12:49:55 +11:00
if ( rv )
return rv ;
if ( backlog > COUNTER_MAX )
return - EINVAL ;
2021-10-17 21:50:17 +08:00
2023-10-10 23:19:52 +08:00
rv = mddev_suspend_and_lock ( mddev ) ;
2023-07-06 16:37:27 +08:00
if ( rv )
return rv ;
2021-10-17 21:50:17 +08:00
/*
* Without write mostly device , it doesn ' t make sense to set
* backlog for max_write_behind .
*/
rdev_for_each ( rdev , mddev ) {
if ( test_bit ( WriteMostly , & rdev - > flags ) ) {
has_write_mostly = true ;
break ;
}
}
if ( ! has_write_mostly ) {
pr_warn_ratelimited ( " %s: can't set backlog, no write mostly device available \n " ,
mdname ( mddev ) ) ;
2023-07-06 16:37:27 +08:00
mddev_unlock ( mddev ) ;
2021-10-17 21:50:17 +08:00
return - EINVAL ;
}
2009-12-14 12:49:55 +11:00
mddev - > bitmap_info . max_write_behind = backlog ;
2019-12-23 10:48:53 +01:00
if ( ! backlog & & mddev - > serial_info_pool ) {
/* serial_info_pool is not needed if backlog is zero */
2019-12-23 10:49:00 +01:00
if ( ! mddev - > serialize_policy )
2023-10-10 23:19:53 +08:00
mddev_destroy_serial_pool ( mddev , NULL ) ;
2019-12-23 10:48:53 +01:00
} else if ( backlog & & ! mddev - > serial_info_pool ) {
/* serial_info_pool is needed since backlog is not zero */
2019-06-14 17:10:37 +08:00
rdev_for_each ( rdev , mddev )
2023-10-10 23:19:53 +08:00
mddev_create_serial_pool ( mddev , rdev ) ;
2019-06-14 17:10:37 +08:00
}
if ( old_mwb ! = backlog )
md_bitmap_update_sb ( mddev - > bitmap ) ;
2023-07-06 16:37:27 +08:00
2023-10-10 23:19:52 +08:00
mddev_unlock_and_resume ( mddev ) ;
2009-12-14 12:49:55 +11:00
return len ;
}
static struct md_sysfs_entry bitmap_backlog =
__ATTR ( backlog , S_IRUGO | S_IWUSR , backlog_show , backlog_store ) ;
static ssize_t
2011-10-11 16:47:53 +11:00
chunksize_show ( struct mddev * mddev , char * page )
2009-12-14 12:49:55 +11:00
{
return sprintf ( page , " %lu \n " , mddev - > bitmap_info . chunksize ) ;
}
static ssize_t
2011-10-11 16:47:53 +11:00
chunksize_store ( struct mddev * mddev , const char * buf , size_t len )
2009-12-14 12:49:55 +11:00
{
/* Can only be changed when no bitmap is active */
int rv ;
unsigned long csize ;
if ( mddev - > bitmap )
return - EBUSY ;
2013-06-01 16:15:16 +09:00
rv = kstrtoul ( buf , 10 , & csize ) ;
2009-12-14 12:49:55 +11:00
if ( rv )
return rv ;
if ( csize < 512 | |
! is_power_of_2 ( csize ) )
return - EINVAL ;
2022-10-25 09:37:05 +02:00
if ( BITS_PER_LONG > 32 & & csize > = ( 1ULL < < ( BITS_PER_BYTE *
sizeof ( ( ( bitmap_super_t * ) 0 ) - > chunksize ) ) ) )
return - EOVERFLOW ;
2009-12-14 12:49:55 +11:00
mddev - > bitmap_info . chunksize = csize ;
return len ;
}
static struct md_sysfs_entry bitmap_chunksize =
__ATTR ( chunksize , S_IRUGO | S_IWUSR , chunksize_show , chunksize_store ) ;
2011-10-11 16:47:53 +11:00
static ssize_t metadata_show ( struct mddev * mddev , char * page )
2009-12-14 12:49:56 +11:00
{
2014-03-29 10:20:02 -05:00
if ( mddev_is_clustered ( mddev ) )
return sprintf ( page , " clustered \n " ) ;
2009-12-14 12:49:56 +11:00
return sprintf ( page , " %s \n " , ( mddev - > bitmap_info . external
? " external " : " internal " ) ) ;
}
2011-10-11 16:47:53 +11:00
static ssize_t metadata_store ( struct mddev * mddev , const char * buf , size_t len )
2009-12-14 12:49:56 +11:00
{
if ( mddev - > bitmap | |
mddev - > bitmap_info . file | |
mddev - > bitmap_info . offset )
return - EBUSY ;
if ( strncmp ( buf , " external " , 8 ) = = 0 )
mddev - > bitmap_info . external = 1 ;
2014-03-29 10:20:02 -05:00
else if ( ( strncmp ( buf , " internal " , 8 ) = = 0 ) | |
( strncmp ( buf , " clustered " , 9 ) = = 0 ) )
2009-12-14 12:49:56 +11:00
mddev - > bitmap_info . external = 0 ;
else
return - EINVAL ;
return len ;
}
static struct md_sysfs_entry bitmap_metadata =
__ATTR ( metadata , S_IRUGO | S_IWUSR , metadata_show , metadata_store ) ;
2011-10-11 16:47:53 +11:00
static ssize_t can_clear_show ( struct mddev * mddev , char * page )
2009-12-14 12:49:56 +11:00
{
int len ;
2014-12-15 12:56:59 +11:00
spin_lock ( & mddev - > lock ) ;
2009-12-14 12:49:56 +11:00
if ( mddev - > bitmap )
len = sprintf ( page , " %s \n " , ( mddev - > bitmap - > need_sync ?
" false " : " true " ) ) ;
else
len = sprintf ( page , " \n " ) ;
2014-12-15 12:56:59 +11:00
spin_unlock ( & mddev - > lock ) ;
2009-12-14 12:49:56 +11:00
return len ;
}
2011-10-11 16:47:53 +11:00
static ssize_t can_clear_store ( struct mddev * mddev , const char * buf , size_t len )
2009-12-14 12:49:56 +11:00
{
if ( mddev - > bitmap = = NULL )
return - ENOENT ;
if ( strncmp ( buf , " false " , 5 ) = = 0 )
mddev - > bitmap - > need_sync = 1 ;
else if ( strncmp ( buf , " true " , 4 ) = = 0 ) {
if ( mddev - > degraded )
return - EBUSY ;
mddev - > bitmap - > need_sync = 0 ;
} else
return - EINVAL ;
return len ;
}
static struct md_sysfs_entry bitmap_can_clear =
__ATTR ( can_clear , S_IRUGO | S_IWUSR , can_clear_show , can_clear_store ) ;
2010-03-08 16:02:37 +11:00
static ssize_t
2011-10-11 16:47:53 +11:00
behind_writes_used_show ( struct mddev * mddev , char * page )
2010-03-08 16:02:37 +11:00
{
2014-12-15 12:56:59 +11:00
ssize_t ret ;
spin_lock ( & mddev - > lock ) ;
2010-03-08 16:02:37 +11:00
if ( mddev - > bitmap = = NULL )
2014-12-15 12:56:59 +11:00
ret = sprintf ( page , " 0 \n " ) ;
else
ret = sprintf ( page , " %lu \n " ,
mddev - > bitmap - > behind_writes_used ) ;
spin_unlock ( & mddev - > lock ) ;
return ret ;
2010-03-08 16:02:37 +11:00
}
static ssize_t
2011-10-11 16:47:53 +11:00
behind_writes_used_reset ( struct mddev * mddev , const char * buf , size_t len )
2010-03-08 16:02:37 +11:00
{
if ( mddev - > bitmap )
mddev - > bitmap - > behind_writes_used = 0 ;
return len ;
}
static struct md_sysfs_entry max_backlog_used =
__ATTR ( max_backlog_used , S_IRUGO | S_IWUSR ,
behind_writes_used_show , behind_writes_used_reset ) ;
2009-12-14 12:49:55 +11:00
static struct attribute * md_bitmap_attrs [ ] = {
& bitmap_location . attr ,
2012-05-22 13:55:07 +10:00
& bitmap_space . attr ,
2009-12-14 12:49:55 +11:00
& bitmap_timeout . attr ,
& bitmap_backlog . attr ,
& bitmap_chunksize . attr ,
2009-12-14 12:49:56 +11:00
& bitmap_metadata . attr ,
& bitmap_can_clear . attr ,
2010-03-08 16:02:37 +11:00
& max_backlog_used . attr ,
2009-12-14 12:49:55 +11:00
NULL
} ;
2021-05-29 12:30:49 +02:00
const struct attribute_group md_bitmap_group = {
2009-12-14 12:49:55 +11:00
. name = " bitmap " ,
. attrs = md_bitmap_attrs ,
} ;