2018-09-04 15:46:30 -07:00
// SPDX-License-Identifier: GPL-2.0+
2009-04-06 19:01:35 -07:00
/*
2021-11-08 18:35:01 -08:00
* NILFS module and super block management .
2009-04-06 19:01:35 -07:00
*
* Copyright ( C ) 2005 - 2008 Nippon Telegraph and Telephone Corporation .
*
2016-05-23 16:23:09 -07:00
* Written by Ryusuke Konishi .
2009-04-06 19:01:35 -07:00
*/
/*
* linux / fs / ext2 / super . c
*
* Copyright ( C ) 1992 , 1993 , 1994 , 1995
* Remy Card ( card @ masi . ibp . fr )
* Laboratoire MASI - Institut Blaise Pascal
* Universite Pierre et Marie Curie ( Paris VI )
*
* from
*
* linux / fs / minix / inode . c
*
* Copyright ( C ) 1991 , 1992 Linus Torvalds
*
* Big - endian to little - endian byte - swapping / bitmaps by
* David S . Miller ( davem @ caip . rutgers . edu ) , 1995
*/
# include <linux/module.h>
# include <linux/string.h>
# include <linux/slab.h>
# include <linux/init.h>
# include <linux/blkdev.h>
# include <linux/crc32.h>
# include <linux/vfs.h>
# include <linux/writeback.h>
2009-06-24 20:06:34 +09:00
# include <linux/seq_file.h>
# include <linux/mount.h>
2023-08-02 17:41:21 +02:00
# include <linux/fs_context.h>
2024-04-25 03:27:16 +09:00
# include <linux/fs_parser.h>
2009-04-06 19:01:35 -07:00
# include "nilfs.h"
2010-08-27 00:23:02 +09:00
# include "export.h"
2009-04-06 19:01:35 -07:00
# include "mdt.h"
# include "alloc.h"
2010-07-10 20:52:09 +09:00
# include "btree.h"
# include "btnode.h"
2009-04-06 19:01:35 -07:00
# include "page.h"
# include "cpfile.h"
2011-05-05 01:23:58 +09:00
# include "sufile.h" /* nilfs_sufile_resize(), nilfs_sufile_set_alloc_range() */
2009-04-06 19:01:35 -07:00
# include "ifile.h"
# include "dat.h"
# include "segment.h"
# include "segbuf.h"
MODULE_AUTHOR ( " NTT Corp. " ) ;
MODULE_DESCRIPTION ( " A New Implementation of the Log-structured Filesystem "
" (NILFS) " ) ;
MODULE_LICENSE ( " GPL " ) ;
2010-10-08 22:37:27 +09:00
static struct kmem_cache * nilfs_inode_cachep ;
2010-04-06 00:54:11 +08:00
struct kmem_cache * nilfs_transaction_cachep ;
struct kmem_cache * nilfs_segbuf_cachep ;
struct kmem_cache * nilfs_btree_path_cache ;
2011-03-09 11:05:08 +09:00
static int nilfs_setup_super ( struct super_block * sb , int is_mount ) ;
2009-04-06 19:01:35 -07:00
2020-08-11 18:35:46 -07:00
void __nilfs_msg ( struct super_block * sb , const char * fmt , . . . )
2016-08-02 14:05:02 -07:00
{
struct va_format vaf ;
va_list args ;
2020-08-11 18:35:46 -07:00
int level ;
2016-08-02 14:05:02 -07:00
va_start ( args , fmt ) ;
2020-08-11 18:35:46 -07:00
level = printk_get_level ( fmt ) ;
vaf . fmt = printk_skip_level ( fmt ) ;
2016-08-02 14:05:02 -07:00
vaf . va = & args ;
2020-08-11 18:35:46 -07:00
2016-08-02 14:05:02 -07:00
if ( sb )
2020-08-11 18:35:46 -07:00
printk ( " %c%cNILFS (%s): %pV \n " ,
KERN_SOH_ASCII , level , sb - > s_id , & vaf ) ;
2016-08-02 14:05:02 -07:00
else
2020-08-11 18:35:46 -07:00
printk ( " %c%cNILFS: %pV \n " ,
KERN_SOH_ASCII , level , & vaf ) ;
2016-08-02 14:05:02 -07:00
va_end ( args ) ;
}
2011-03-09 11:05:08 +09:00
static void nilfs_set_error ( struct super_block * sb )
2010-06-28 17:49:30 +09:00
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2010-06-28 17:49:32 +09:00
struct nilfs_super_block * * sbp ;
2010-06-28 17:49:30 +09:00
down_write ( & nilfs - > ns_sem ) ;
if ( ! ( nilfs - > ns_mount_state & NILFS_ERROR_FS ) ) {
nilfs - > ns_mount_state | = NILFS_ERROR_FS ;
2011-03-09 11:05:08 +09:00
sbp = nilfs_prepare_super ( sb , 0 ) ;
2010-06-28 17:49:32 +09:00
if ( likely ( sbp ) ) {
sbp [ 0 ] - > s_state | = cpu_to_le16 ( NILFS_ERROR_FS ) ;
2010-06-28 17:49:33 +09:00
if ( sbp [ 1 ] )
sbp [ 1 ] - > s_state | = cpu_to_le16 ( NILFS_ERROR_FS ) ;
2011-03-09 11:05:08 +09:00
nilfs_commit_super ( sb , NILFS_SB_COMMIT_ALL ) ;
2010-06-28 17:49:32 +09:00
}
2010-06-28 17:49:30 +09:00
}
up_write ( & nilfs - > ns_sem ) ;
}
2009-04-06 19:01:35 -07:00
/**
2016-08-02 14:05:00 -07:00
* __nilfs_error ( ) - report failure condition on a filesystem
2009-04-06 19:01:35 -07:00
*
2016-08-02 14:05:00 -07:00
* __nilfs_error ( ) sets an ERROR_FS flag on the superblock as well as
* reporting an error message . This function should be called when
* NILFS detects incoherences or defects of meta data on disk .
2009-04-06 19:01:35 -07:00
*
2016-08-02 14:05:00 -07:00
* This implements the body of nilfs_error ( ) macro . Normally ,
* nilfs_error ( ) should be used . As for sustainable errors such as a
2020-08-11 18:35:49 -07:00
* single - shot I / O error , nilfs_err ( ) should be used instead .
2016-08-02 14:05:00 -07:00
*
* Callers should not add a trailing newline since this will do it .
2009-04-06 19:01:35 -07:00
*/
2016-08-02 14:05:00 -07:00
void __nilfs_error ( struct super_block * sb , const char * function ,
const char * fmt , . . . )
2009-04-06 19:01:35 -07:00
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2010-11-09 16:35:21 -08:00
struct va_format vaf ;
2009-04-06 19:01:35 -07:00
va_list args ;
va_start ( args , fmt ) ;
2010-11-09 16:35:21 -08:00
vaf . fmt = fmt ;
vaf . va = & args ;
printk ( KERN_CRIT " NILFS error (device %s): %s: %pV \n " ,
sb - > s_id , function , & vaf ) ;
2009-04-06 19:01:35 -07:00
va_end ( args ) ;
2017-07-17 08:45:34 +01:00
if ( ! sb_rdonly ( sb ) ) {
2011-03-09 11:05:08 +09:00
nilfs_set_error ( sb ) ;
2009-04-06 19:01:35 -07:00
2011-03-09 11:05:07 +09:00
if ( nilfs_test_opt ( nilfs , ERRORS_RO ) ) {
2009-04-06 19:01:35 -07:00
printk ( KERN_CRIT " Remounting filesystem read-only \n " ) ;
2017-11-27 13:05:09 -08:00
sb - > s_flags | = SB_RDONLY ;
2009-04-06 19:01:35 -07:00
}
}
2011-03-09 11:05:07 +09:00
if ( nilfs_test_opt ( nilfs , ERRORS_PANIC ) )
2009-04-06 19:01:35 -07:00
panic ( " NILFS (device %s): panic forced after error \n " ,
sb - > s_id ) ;
}
2010-09-05 13:35:53 +09:00
struct inode * nilfs_alloc_inode ( struct super_block * sb )
2009-04-06 19:01:35 -07:00
{
struct nilfs_inode_info * ii ;
2022-03-22 14:41:03 -07:00
ii = alloc_inode_sb ( sb , nilfs_inode_cachep , GFP_NOFS ) ;
2009-04-06 19:01:35 -07:00
if ( ! ii )
return NULL ;
ii - > i_bh = NULL ;
ii - > i_state = 0 ;
2010-08-20 21:20:29 +09:00
ii - > i_cno = 0 ;
2022-04-01 11:28:18 -07:00
ii - > i_assoc_inode = NULL ;
ii - > i_bmap = & ii - > i_bmap_data ;
2009-04-06 19:01:35 -07:00
return & ii - > vfs_inode ;
}
2019-04-15 20:23:38 -04:00
static void nilfs_free_inode ( struct inode * inode )
2009-04-06 19:01:35 -07:00
{
2016-05-23 16:23:20 -07:00
if ( nilfs_is_metadata_file_inode ( inode ) )
nilfs_mdt_destroy ( inode ) ;
2009-04-06 19:01:35 -07:00
kmem_cache_free ( nilfs_inode_cachep , NILFS_I ( inode ) ) ;
}
2011-03-09 11:05:08 +09:00
static int nilfs_sync_super ( struct super_block * sb , int flag )
2009-04-06 19:01:35 -07:00
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2009-04-06 19:01:35 -07:00
int err ;
retry :
2009-04-06 19:01:59 -07:00
set_buffer_dirty ( nilfs - > ns_sbh [ 0 ] ) ;
2011-03-09 11:05:07 +09:00
if ( nilfs_test_opt ( nilfs , BARRIER ) ) {
2010-08-11 17:05:45 +02:00
err = __sync_dirty_buffer ( nilfs - > ns_sbh [ 0 ] ,
2016-11-01 07:40:10 -06:00
REQ_SYNC | REQ_PREFLUSH | REQ_FUA ) ;
2010-08-11 17:05:45 +02:00
} else {
err = sync_dirty_buffer ( nilfs - > ns_sbh [ 0 ] ) ;
2009-04-06 19:01:35 -07:00
}
2010-08-11 17:05:45 +02:00
2009-04-06 19:01:59 -07:00
if ( unlikely ( err ) ) {
2020-08-11 18:35:49 -07:00
nilfs_err ( sb , " unable to write superblock: err=%d " , err ) ;
2009-04-06 19:01:59 -07:00
if ( err = = - EIO & & nilfs - > ns_sbh [ 1 ] ) {
2010-06-28 17:49:33 +09:00
/*
* sbp [ 0 ] points to newer log than sbp [ 1 ] ,
* so copy sbp [ 0 ] to sbp [ 1 ] to take over sbp [ 0 ] .
*/
memcpy ( nilfs - > ns_sbp [ 1 ] , nilfs - > ns_sbp [ 0 ] ,
nilfs - > ns_sbsize ) ;
2009-04-06 19:01:59 -07:00
nilfs_fall_back_super_block ( nilfs ) ;
goto retry ;
}
} else {
struct nilfs_super_block * sbp = nilfs - > ns_sbp [ 0 ] ;
2010-06-28 17:49:33 +09:00
nilfs - > ns_sbwcount + + ;
2009-04-06 19:01:59 -07:00
/*
* The latest segment becomes trailable from the position
* written in superblock .
*/
2009-04-06 19:01:35 -07:00
clear_nilfs_discontinued ( nilfs ) ;
2009-04-06 19:01:59 -07:00
/* update GC protection for recent segments */
if ( nilfs - > ns_sbh [ 1 ] ) {
2010-06-28 17:49:33 +09:00
if ( flag = = NILFS_SB_COMMIT_ALL ) {
2009-04-06 19:01:59 -07:00
set_buffer_dirty ( nilfs - > ns_sbh [ 1 ] ) ;
2010-06-28 17:49:33 +09:00
if ( sync_dirty_buffer ( nilfs - > ns_sbh [ 1 ] ) < 0 )
goto out ;
2009-04-06 19:01:59 -07:00
}
2010-06-28 17:49:33 +09:00
if ( le64_to_cpu ( nilfs - > ns_sbp [ 1 ] - > s_last_cno ) <
le64_to_cpu ( nilfs - > ns_sbp [ 0 ] - > s_last_cno ) )
sbp = nilfs - > ns_sbp [ 1 ] ;
2009-04-06 19:01:59 -07:00
}
2009-04-06 19:01:35 -07:00
2010-06-28 17:49:33 +09:00
spin_lock ( & nilfs - > ns_last_segment_lock ) ;
nilfs - > ns_prot_seq = le64_to_cpu ( sbp - > s_last_seq ) ;
spin_unlock ( & nilfs - > ns_last_segment_lock ) ;
}
out :
2009-04-06 19:01:35 -07:00
return err ;
}
2010-06-28 17:49:31 +09:00
void nilfs_set_log_cursor ( struct nilfs_super_block * sbp ,
struct the_nilfs * nilfs )
{
sector_t nfreeblocks ;
/* nilfs->ns_sem must be locked by the caller. */
nilfs_count_free_blocks ( nilfs , & nfreeblocks ) ;
sbp - > s_free_blocks_count = cpu_to_le64 ( nfreeblocks ) ;
spin_lock ( & nilfs - > ns_last_segment_lock ) ;
sbp - > s_last_seq = cpu_to_le64 ( nilfs - > ns_last_seq ) ;
sbp - > s_last_pseg = cpu_to_le64 ( nilfs - > ns_last_pseg ) ;
sbp - > s_last_cno = cpu_to_le64 ( nilfs - > ns_last_cno ) ;
spin_unlock ( & nilfs - > ns_last_segment_lock ) ;
}
2011-03-09 11:05:08 +09:00
struct nilfs_super_block * * nilfs_prepare_super ( struct super_block * sb ,
2010-06-28 17:49:33 +09:00
int flip )
2009-04-06 19:01:35 -07:00
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2009-04-06 19:01:59 -07:00
struct nilfs_super_block * * sbp = nilfs - > ns_sbp ;
2009-04-06 19:01:35 -07:00
2010-06-28 17:49:32 +09:00
/* nilfs->ns_sem must be locked by the caller. */
2010-05-01 10:07:07 +09:00
if ( sbp [ 0 ] - > s_magic ! = cpu_to_le16 ( NILFS_SUPER_MAGIC ) ) {
2010-06-28 17:49:32 +09:00
if ( sbp [ 1 ] & &
sbp [ 1 ] - > s_magic = = cpu_to_le16 ( NILFS_SUPER_MAGIC ) ) {
2010-06-28 17:49:33 +09:00
memcpy ( sbp [ 0 ] , sbp [ 1 ] , nilfs - > ns_sbsize ) ;
2010-06-28 17:49:32 +09:00
} else {
2020-08-11 18:35:49 -07:00
nilfs_crit ( sb , " superblock broke " ) ;
2010-06-28 17:49:32 +09:00
return NULL ;
2009-04-06 19:01:59 -07:00
}
2010-06-28 17:49:33 +09:00
} else if ( sbp [ 1 ] & &
sbp [ 1 ] - > s_magic ! = cpu_to_le16 ( NILFS_SUPER_MAGIC ) ) {
2016-05-23 16:23:36 -07:00
memcpy ( sbp [ 1 ] , sbp [ 0 ] , nilfs - > ns_sbsize ) ;
2009-04-06 19:01:59 -07:00
}
2010-06-28 17:49:33 +09:00
if ( flip & & sbp [ 1 ] )
nilfs_swap_super_block ( nilfs ) ;
2010-06-28 17:49:32 +09:00
return sbp ;
}
2011-03-09 11:05:08 +09:00
int nilfs_commit_super ( struct super_block * sb , int flag )
2010-06-28 17:49:32 +09:00
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2010-06-28 17:49:32 +09:00
struct nilfs_super_block * * sbp = nilfs - > ns_sbp ;
2018-02-06 15:39:21 -08:00
time64_t t ;
2010-06-28 17:49:32 +09:00
/* nilfs->ns_sem must be locked by the caller. */
2018-02-06 15:39:21 -08:00
t = ktime_get_real_seconds ( ) ;
2010-06-28 17:49:33 +09:00
nilfs - > ns_sbwtime = t ;
2009-04-06 19:01:59 -07:00
sbp [ 0 ] - > s_wtime = cpu_to_le64 ( t ) ;
sbp [ 0 ] - > s_sum = 0 ;
sbp [ 0 ] - > s_sum = cpu_to_le32 ( crc32_le ( nilfs - > ns_crc_seed ,
( unsigned char * ) sbp [ 0 ] ,
nilfs - > ns_sbsize ) ) ;
2010-06-28 17:49:33 +09:00
if ( flag = = NILFS_SB_COMMIT_ALL & & sbp [ 1 ] ) {
sbp [ 1 ] - > s_wtime = sbp [ 0 ] - > s_wtime ;
sbp [ 1 ] - > s_sum = 0 ;
sbp [ 1 ] - > s_sum = cpu_to_le32 ( crc32_le ( nilfs - > ns_crc_seed ,
( unsigned char * ) sbp [ 1 ] ,
nilfs - > ns_sbsize ) ) ;
2009-04-06 19:01:59 -07:00
}
2009-12-09 00:57:52 +09:00
clear_nilfs_sb_dirty ( nilfs ) ;
2014-10-13 15:53:20 -07:00
nilfs - > ns_flushed_device = 1 ;
/* make sure store to ns_flushed_device cannot be reordered */
smp_wmb ( ) ;
2011-03-09 11:05:08 +09:00
return nilfs_sync_super ( sb , flag ) ;
2009-04-06 19:01:35 -07:00
}
2010-06-28 17:49:29 +09:00
/**
* nilfs_cleanup_super ( ) - write filesystem state for cleanup
2011-03-09 11:05:08 +09:00
* @ sb : super block instance to be unmounted or degraded to read - only
2010-06-28 17:49:29 +09:00
*
* This function restores state flags in the on - disk super block .
* This will set " clean " flag ( i . e . NILFS_VALID_FS ) unless the
* filesystem was not clean previously .
*/
2011-03-09 11:05:08 +09:00
int nilfs_cleanup_super ( struct super_block * sb )
2010-06-28 17:49:29 +09:00
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2010-06-28 17:49:32 +09:00
struct nilfs_super_block * * sbp ;
2010-06-28 17:49:33 +09:00
int flag = NILFS_SB_COMMIT ;
2010-06-28 17:49:32 +09:00
int ret = - EIO ;
2010-06-28 17:49:29 +09:00
2011-03-09 11:05:08 +09:00
sbp = nilfs_prepare_super ( sb , 0 ) ;
2010-06-28 17:49:32 +09:00
if ( sbp ) {
2011-03-09 11:05:08 +09:00
sbp [ 0 ] - > s_state = cpu_to_le16 ( nilfs - > ns_mount_state ) ;
nilfs_set_log_cursor ( sbp [ 0 ] , nilfs ) ;
2010-06-28 17:49:33 +09:00
if ( sbp [ 1 ] & & sbp [ 0 ] - > s_last_cno = = sbp [ 1 ] - > s_last_cno ) {
/*
* make the " clean " flag also to the opposite
* super block if both super blocks point to
* the same checkpoint .
*/
sbp [ 1 ] - > s_state = sbp [ 0 ] - > s_state ;
flag = NILFS_SB_COMMIT_ALL ;
}
2011-03-09 11:05:08 +09:00
ret = nilfs_commit_super ( sb , flag ) ;
2010-06-28 17:49:32 +09:00
}
2010-06-28 17:49:29 +09:00
return ret ;
}
2011-05-05 01:23:57 +09:00
/**
* nilfs_move_2nd_super - relocate secondary super block
* @ sb : super block instance
* @ sb2off : new offset of the secondary super block ( in bytes )
*/
static int nilfs_move_2nd_super ( struct super_block * sb , loff_t sb2off )
{
struct the_nilfs * nilfs = sb - > s_fs_info ;
struct buffer_head * nsbh ;
struct nilfs_super_block * nsbp ;
sector_t blocknr , newblocknr ;
unsigned long offset ;
2015-11-06 16:32:16 -08:00
int sb2i ; /* array index of the secondary superblock */
2011-05-05 01:23:57 +09:00
int ret = 0 ;
/* nilfs->ns_sem must be locked by the caller. */
if ( nilfs - > ns_sbh [ 1 ] & &
nilfs - > ns_sbh [ 1 ] - > b_blocknr > nilfs - > ns_first_data_block ) {
sb2i = 1 ;
blocknr = nilfs - > ns_sbh [ 1 ] - > b_blocknr ;
} else if ( nilfs - > ns_sbh [ 0 ] - > b_blocknr > nilfs - > ns_first_data_block ) {
sb2i = 0 ;
blocknr = nilfs - > ns_sbh [ 0 ] - > b_blocknr ;
2015-11-06 16:32:16 -08:00
} else {
sb2i = - 1 ;
blocknr = 0 ;
2011-05-05 01:23:57 +09:00
}
if ( sb2i > = 0 & & ( u64 ) blocknr < < nilfs - > ns_blocksize_bits = = sb2off )
goto out ; /* super block location is unchanged */
/* Get new super block buffer */
newblocknr = sb2off > > nilfs - > ns_blocksize_bits ;
offset = sb2off & ( nilfs - > ns_blocksize - 1 ) ;
nsbh = sb_getblk ( sb , newblocknr ) ;
if ( ! nsbh ) {
2020-08-11 18:35:49 -07:00
nilfs_warn ( sb ,
" unable to move secondary superblock to block %llu " ,
( unsigned long long ) newblocknr ) ;
2011-05-05 01:23:57 +09:00
ret = - EIO ;
goto out ;
}
nsbp = ( void * ) nsbh - > b_data + offset ;
nilfs2: fix buffer corruption due to concurrent device reads
As a result of analysis of a syzbot report, it turned out that in three
cases where nilfs2 allocates block device buffers directly via sb_getblk,
concurrent reads to the device can corrupt the allocated buffers.
Nilfs2 uses sb_getblk for segment summary blocks, that make up a log
header, and the super root block, that is the trailer, and when moving and
writing the second super block after fs resize.
In any of these, since the uptodate flag is not set when storing metadata
to be written in the allocated buffers, the stored metadata will be
overwritten if a device read of the same block occurs concurrently before
the write. This causes metadata corruption and misbehavior in the log
write itself, causing warnings in nilfs_btree_assign() as reported.
Fix these issues by setting an uptodate flag on the buffer head on the
first or before modifying each buffer obtained with sb_getblk, and
clearing the flag on failure.
When setting the uptodate flag, the lock_buffer/unlock_buffer pair is used
to perform necessary exclusive control, and the buffer is filled to ensure
that uninitialized bytes are not mixed into the data read from others. As
for buffers for segment summary blocks, they are filled incrementally, so
if the uptodate flag was unset on their allocation, set the flag and zero
fill the buffer once at that point.
Also, regarding the superblock move routine, the starting point of the
memset call to zerofill the block is incorrectly specified, which can
cause a buffer overflow on file systems with block sizes greater than
4KiB. In addition, if the superblock is moved within a large block, it is
necessary to assume the possibility that the data in the superblock will
be destroyed by zero-filling before copying. So fix these potential
issues as well.
Link: https://lkml.kernel.org/r/20230609035732.20426-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: syzbot+31837fe952932efc8fb9@syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/00000000000030000a05e981f475@google.com
Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-06-09 12:57:32 +09:00
lock_buffer ( nsbh ) ;
2011-05-05 01:23:57 +09:00
if ( sb2i > = 0 ) {
nilfs2: fix buffer corruption due to concurrent device reads
As a result of analysis of a syzbot report, it turned out that in three
cases where nilfs2 allocates block device buffers directly via sb_getblk,
concurrent reads to the device can corrupt the allocated buffers.
Nilfs2 uses sb_getblk for segment summary blocks, that make up a log
header, and the super root block, that is the trailer, and when moving and
writing the second super block after fs resize.
In any of these, since the uptodate flag is not set when storing metadata
to be written in the allocated buffers, the stored metadata will be
overwritten if a device read of the same block occurs concurrently before
the write. This causes metadata corruption and misbehavior in the log
write itself, causing warnings in nilfs_btree_assign() as reported.
Fix these issues by setting an uptodate flag on the buffer head on the
first or before modifying each buffer obtained with sb_getblk, and
clearing the flag on failure.
When setting the uptodate flag, the lock_buffer/unlock_buffer pair is used
to perform necessary exclusive control, and the buffer is filled to ensure
that uninitialized bytes are not mixed into the data read from others. As
for buffers for segment summary blocks, they are filled incrementally, so
if the uptodate flag was unset on their allocation, set the flag and zero
fill the buffer once at that point.
Also, regarding the superblock move routine, the starting point of the
memset call to zerofill the block is incorrectly specified, which can
cause a buffer overflow on file systems with block sizes greater than
4KiB. In addition, if the superblock is moved within a large block, it is
necessary to assume the possibility that the data in the superblock will
be destroyed by zero-filling before copying. So fix these potential
issues as well.
Link: https://lkml.kernel.org/r/20230609035732.20426-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: syzbot+31837fe952932efc8fb9@syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/00000000000030000a05e981f475@google.com
Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-06-09 12:57:32 +09:00
/*
* The position of the second superblock only changes by 4 KiB ,
* which is larger than the maximum superblock data size
* ( = 1 KiB ) , so there is no need to use memmove ( ) to allow
* overlap between source and destination .
*/
2011-05-05 01:23:57 +09:00
memcpy ( nsbp , nilfs - > ns_sbp [ sb2i ] , nilfs - > ns_sbsize ) ;
nilfs2: fix buffer corruption due to concurrent device reads
As a result of analysis of a syzbot report, it turned out that in three
cases where nilfs2 allocates block device buffers directly via sb_getblk,
concurrent reads to the device can corrupt the allocated buffers.
Nilfs2 uses sb_getblk for segment summary blocks, that make up a log
header, and the super root block, that is the trailer, and when moving and
writing the second super block after fs resize.
In any of these, since the uptodate flag is not set when storing metadata
to be written in the allocated buffers, the stored metadata will be
overwritten if a device read of the same block occurs concurrently before
the write. This causes metadata corruption and misbehavior in the log
write itself, causing warnings in nilfs_btree_assign() as reported.
Fix these issues by setting an uptodate flag on the buffer head on the
first or before modifying each buffer obtained with sb_getblk, and
clearing the flag on failure.
When setting the uptodate flag, the lock_buffer/unlock_buffer pair is used
to perform necessary exclusive control, and the buffer is filled to ensure
that uninitialized bytes are not mixed into the data read from others. As
for buffers for segment summary blocks, they are filled incrementally, so
if the uptodate flag was unset on their allocation, set the flag and zero
fill the buffer once at that point.
Also, regarding the superblock move routine, the starting point of the
memset call to zerofill the block is incorrectly specified, which can
cause a buffer overflow on file systems with block sizes greater than
4KiB. In addition, if the superblock is moved within a large block, it is
necessary to assume the possibility that the data in the superblock will
be destroyed by zero-filling before copying. So fix these potential
issues as well.
Link: https://lkml.kernel.org/r/20230609035732.20426-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: syzbot+31837fe952932efc8fb9@syzkaller.appspotmail.com
Closes: https://lkml.kernel.org/r/00000000000030000a05e981f475@google.com
Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-06-09 12:57:32 +09:00
/*
* Zero fill after copy to avoid overwriting in case of move
* within the same block .
*/
memset ( nsbh - > b_data , 0 , offset ) ;
memset ( ( void * ) nsbp + nilfs - > ns_sbsize , 0 ,
nsbh - > b_size - offset - nilfs - > ns_sbsize ) ;
} else {
memset ( nsbh - > b_data , 0 , nsbh - > b_size ) ;
}
set_buffer_uptodate ( nsbh ) ;
unlock_buffer ( nsbh ) ;
if ( sb2i > = 0 ) {
2011-05-05 01:23:57 +09:00
brelse ( nilfs - > ns_sbh [ sb2i ] ) ;
nilfs - > ns_sbh [ sb2i ] = nsbh ;
nilfs - > ns_sbp [ sb2i ] = nsbp ;
} else if ( nilfs - > ns_sbh [ 0 ] - > b_blocknr < nilfs - > ns_first_data_block ) {
/* secondary super block will be restored to index 1 */
nilfs - > ns_sbh [ 1 ] = nsbh ;
nilfs - > ns_sbp [ 1 ] = nsbp ;
} else {
brelse ( nsbh ) ;
}
out :
return ret ;
}
2011-05-05 01:23:58 +09:00
/**
* nilfs_resize_fs - resize the filesystem
* @ sb : super block instance
* @ newsize : new size of the filesystem ( in bytes )
*/
int nilfs_resize_fs ( struct super_block * sb , __u64 newsize )
{
struct the_nilfs * nilfs = sb - > s_fs_info ;
struct nilfs_super_block * * sbp ;
__u64 devsize , newnsegs ;
loff_t sb2off ;
int ret ;
ret = - ERANGE ;
2021-10-18 12:11:19 +02:00
devsize = bdev_nr_bytes ( sb - > s_bdev ) ;
2011-05-05 01:23:58 +09:00
if ( newsize > devsize )
goto out ;
nilfs2: fix underflow in second superblock position calculations
Macro NILFS_SB2_OFFSET_BYTES, which computes the position of the second
superblock, underflows when the argument device size is less than 4096
bytes. Therefore, when using this macro, it is necessary to check in
advance that the device size is not less than a lower limit, or at least
that underflow does not occur.
The current nilfs2 implementation lacks this check, causing out-of-bound
block access when mounting devices smaller than 4096 bytes:
I/O error, dev loop0, sector 36028797018963960 op 0x0:(READ) flags 0x0
phys_seg 1 prio class 2
NILFS (loop0): unable to read secondary superblock (blocksize = 1024)
In addition, when trying to resize the filesystem to a size below 4096
bytes, this underflow occurs in nilfs_resize_fs(), passing a huge number
of segments to nilfs_sufile_resize(), corrupting parameters such as the
number of segments in superblocks. This causes excessive loop iterations
in nilfs_sufile_resize() during a subsequent resize ioctl, causing
semaphore ns_segctor_sem to block for a long time and hang the writer
thread:
INFO: task segctord:5067 blocked for more than 143 seconds.
Not tainted 6.2.0-rc8-syzkaller-00015-gf6feea56f66d #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:segctord state:D stack:23456 pid:5067 ppid:2
flags:0x00004000
Call Trace:
<TASK>
context_switch kernel/sched/core.c:5293 [inline]
__schedule+0x1409/0x43f0 kernel/sched/core.c:6606
schedule+0xc3/0x190 kernel/sched/core.c:6682
rwsem_down_write_slowpath+0xfcf/0x14a0 kernel/locking/rwsem.c:1190
nilfs_transaction_lock+0x25c/0x4f0 fs/nilfs2/segment.c:357
nilfs_segctor_thread_construct fs/nilfs2/segment.c:2486 [inline]
nilfs_segctor_thread+0x52f/0x1140 fs/nilfs2/segment.c:2570
kthread+0x270/0x300 kernel/kthread.c:376
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308
</TASK>
...
Call Trace:
<TASK>
folio_mark_accessed+0x51c/0xf00 mm/swap.c:515
__nilfs_get_page_block fs/nilfs2/page.c:42 [inline]
nilfs_grab_buffer+0x3d3/0x540 fs/nilfs2/page.c:61
nilfs_mdt_submit_block+0xd7/0x8f0 fs/nilfs2/mdt.c:121
nilfs_mdt_read_block+0xeb/0x430 fs/nilfs2/mdt.c:176
nilfs_mdt_get_block+0x12d/0xbb0 fs/nilfs2/mdt.c:251
nilfs_sufile_get_segment_usage_block fs/nilfs2/sufile.c:92 [inline]
nilfs_sufile_truncate_range fs/nilfs2/sufile.c:679 [inline]
nilfs_sufile_resize+0x7a3/0x12b0 fs/nilfs2/sufile.c:777
nilfs_resize_fs+0x20c/0xed0 fs/nilfs2/super.c:422
nilfs_ioctl_resize fs/nilfs2/ioctl.c:1033 [inline]
nilfs_ioctl+0x137c/0x2440 fs/nilfs2/ioctl.c:1301
...
This fixes these issues by inserting appropriate minimum device size
checks or anti-underflow checks, depending on where the macro is used.
Link: https://lkml.kernel.org/r/0000000000004e1dfa05f4a48e6b@google.com
Link: https://lkml.kernel.org/r/20230214224043.24141-1-konishi.ryusuke@gmail.com
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: <syzbot+f0c4082ce5ebebdac63b@syzkaller.appspotmail.com>
Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-02-15 07:40:43 +09:00
/*
* Prevent underflow in second superblock position calculation .
* The exact minimum size check is done in nilfs_sufile_resize ( ) .
*/
if ( newsize < 4096 ) {
ret = - ENOSPC ;
goto out ;
}
2011-05-05 01:23:58 +09:00
/*
* Write lock is required to protect some functions depending
* on the number of segments , the number of reserved segments ,
* and so forth .
*/
down_write ( & nilfs - > ns_segctor_sem ) ;
sb2off = NILFS_SB2_OFFSET_BYTES ( newsize ) ;
newnsegs = sb2off > > nilfs - > ns_blocksize_bits ;
2024-03-06 23:25:47 +09:00
newnsegs = div64_ul ( newnsegs , nilfs - > ns_blocks_per_segment ) ;
2011-05-05 01:23:58 +09:00
ret = nilfs_sufile_resize ( nilfs - > ns_sufile , newnsegs ) ;
up_write ( & nilfs - > ns_segctor_sem ) ;
if ( ret < 0 )
goto out ;
ret = nilfs_construct_segment ( sb ) ;
if ( ret < 0 )
goto out ;
down_write ( & nilfs - > ns_sem ) ;
nilfs_move_2nd_super ( sb , sb2off ) ;
ret = - EIO ;
sbp = nilfs_prepare_super ( sb , 0 ) ;
if ( likely ( sbp ) ) {
nilfs_set_log_cursor ( sbp [ 0 ] , nilfs ) ;
/*
* Drop NILFS_RESIZE_FS flag for compatibility with
* mount - time resize which may be implemented in a
* future release .
*/
sbp [ 0 ] - > s_state = cpu_to_le16 ( le16_to_cpu ( sbp [ 0 ] - > s_state ) &
~ NILFS_RESIZE_FS ) ;
sbp [ 0 ] - > s_dev_size = cpu_to_le64 ( newsize ) ;
sbp [ 0 ] - > s_nsegments = cpu_to_le64 ( nilfs - > ns_nsegments ) ;
if ( sbp [ 1 ] )
memcpy ( sbp [ 1 ] , sbp [ 0 ] , nilfs - > ns_sbsize ) ;
ret = nilfs_commit_super ( sb , NILFS_SB_COMMIT_ALL ) ;
}
up_write ( & nilfs - > ns_sem ) ;
/*
* Reset the range of allocatable segments last . This order
* is important in the case of expansion because the secondary
* superblock must be protected from log write until migration
* completes .
*/
if ( ! ret )
nilfs_sufile_set_alloc_range ( nilfs - > ns_sufile , 0 , newnsegs - 1 ) ;
out :
return ret ;
}
2009-04-06 19:01:35 -07:00
static void nilfs_put_super ( struct super_block * sb )
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2009-04-06 19:01:35 -07:00
2011-03-09 11:05:08 +09:00
nilfs_detach_log_writer ( sb ) ;
2009-04-06 19:01:35 -07:00
2017-07-17 08:45:34 +01:00
if ( ! sb_rdonly ( sb ) ) {
2009-04-06 19:01:35 -07:00
down_write ( & nilfs - > ns_sem ) ;
2011-03-09 11:05:08 +09:00
nilfs_cleanup_super ( sb ) ;
2009-04-06 19:01:35 -07:00
up_write ( & nilfs - > ns_sem ) ;
}
nilfs2: fix sysfs interface lifetime
The current nilfs2 sysfs support has issues with the timing of creation
and deletion of sysfs entries, potentially leading to null pointer
dereferences, use-after-free, and lockdep warnings.
Some of the sysfs attributes for nilfs2 per-filesystem instance refer to
metadata file "cpfile", "sufile", or "dat", but
nilfs_sysfs_create_device_group that creates those attributes is executed
before the inodes for these metadata files are loaded, and
nilfs_sysfs_delete_device_group which deletes these sysfs entries is
called after releasing their metadata file inodes.
Therefore, access to some of these sysfs attributes may occur outside of
the lifetime of these metadata files, resulting in inode NULL pointer
dereferences or use-after-free.
In addition, the call to nilfs_sysfs_create_device_group() is made during
the locking period of the semaphore "ns_sem" of nilfs object, so the
shrinker call caused by the memory allocation for the sysfs entries, may
derive lock dependencies "ns_sem" -> (shrinker) -> "locks acquired in
nilfs_evict_inode()".
Since nilfs2 may acquire "ns_sem" deep in the call stack holding other
locks via its error handler __nilfs_error(), this causes lockdep to report
circular locking. This is a false positive and no circular locking
actually occurs as no inodes exist yet when
nilfs_sysfs_create_device_group() is called. Fortunately, the lockdep
warnings can be resolved by simply moving the call to
nilfs_sysfs_create_device_group() out of "ns_sem".
This fixes these sysfs issues by revising where the device's sysfs
interface is created/deleted and keeping its lifetime within the lifetime
of the metadata files above.
Link: https://lkml.kernel.org/r/20230330205515.6167-1-konishi.ryusuke@gmail.com
Fixes: dd70edbde262 ("nilfs2: integrate sysfs support into driver")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: syzbot+979fa7f9c0d086fdc282@syzkaller.appspotmail.com
Link: https://lkml.kernel.org/r/0000000000003414b505f7885f7e@google.com
Reported-by: syzbot+5b7d542076d9bddc3c6a@syzkaller.appspotmail.com
Link: https://lkml.kernel.org/r/0000000000006ac86605f5f44eb9@google.com
Cc: Viacheslav Dubeyko <slava@dubeyko.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-03-31 05:55:15 +09:00
nilfs_sysfs_delete_device_group ( nilfs ) ;
2010-09-05 12:20:59 +09:00
iput ( nilfs - > ns_sufile ) ;
iput ( nilfs - > ns_cpfile ) ;
iput ( nilfs - > ns_dat ) ;
2010-09-09 02:07:56 +09:00
destroy_nilfs ( nilfs ) ;
2009-04-06 19:01:35 -07:00
sb - > s_fs_info = NULL ;
}
static int nilfs_sync_fs ( struct super_block * sb , int wait )
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2010-06-28 17:49:32 +09:00
struct nilfs_super_block * * sbp ;
2009-04-06 19:01:35 -07:00
int err = 0 ;
/* This function is called when super block should be written back */
if ( wait )
err = nilfs_construct_segment ( sb ) ;
2009-07-23 01:26:33 +09:00
down_write ( & nilfs - > ns_sem ) ;
2010-06-28 17:49:32 +09:00
if ( nilfs_sb_dirty ( nilfs ) ) {
2011-03-09 11:05:08 +09:00
sbp = nilfs_prepare_super ( sb , nilfs_sb_will_flip ( nilfs ) ) ;
2010-06-28 17:49:33 +09:00
if ( likely ( sbp ) ) {
nilfs_set_log_cursor ( sbp [ 0 ] , nilfs ) ;
2011-03-09 11:05:08 +09:00
nilfs_commit_super ( sb , NILFS_SB_COMMIT ) ;
2010-06-28 17:49:33 +09:00
}
2010-06-28 17:49:32 +09:00
}
2009-07-23 01:26:33 +09:00
up_write ( & nilfs - > ns_sem ) ;
2014-10-13 15:53:20 -07:00
if ( ! err )
err = nilfs_flush_device ( nilfs ) ;
2009-04-06 19:01:35 -07:00
return err ;
}
2011-03-09 11:05:08 +09:00
int nilfs_attach_checkpoint ( struct super_block * sb , __u64 cno , int curr_mnt ,
2010-08-25 17:45:44 +09:00
struct nilfs_root * * rootp )
2009-04-06 19:01:35 -07:00
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2010-08-25 17:45:44 +09:00
struct nilfs_root * root ;
int err = - ENOMEM ;
2009-04-06 19:01:35 -07:00
2010-08-25 17:45:44 +09:00
root = nilfs_find_or_create_root (
nilfs , curr_mnt ? NILFS_CPTREE_CURRENT_CNO : cno ) ;
if ( ! root )
return err ;
2009-04-06 19:01:35 -07:00
2010-08-14 13:07:15 +09:00
if ( root - > ifile )
goto reuse ; /* already attached checkpoint */
2009-04-06 19:01:35 -07:00
2009-08-18 14:58:24 +08:00
down_read ( & nilfs - > ns_segctor_sem ) ;
2024-01-22 23:02:00 +09:00
err = nilfs_ifile_read ( sb , root , cno , nilfs - > ns_inode_size ) ;
2009-08-18 14:58:24 +08:00
up_read ( & nilfs - > ns_segctor_sem ) ;
2024-01-22 23:02:00 +09:00
if ( unlikely ( err ) )
2009-04-06 19:01:35 -07:00
goto failed ;
2010-08-25 17:45:44 +09:00
2010-08-14 13:07:15 +09:00
reuse :
2010-08-25 17:45:44 +09:00
* rootp = root ;
2009-04-06 19:01:35 -07:00
return 0 ;
failed :
2024-01-22 23:02:00 +09:00
if ( err = = - EINVAL )
nilfs_err ( sb , " Invalid checkpoint (checkpoint number=%llu) " ,
( unsigned long long ) cno ) ;
2010-08-25 17:45:44 +09:00
nilfs_put_root ( root ) ;
2009-04-06 19:01:35 -07:00
return err ;
}
2010-09-20 18:19:06 +09:00
static int nilfs_freeze ( struct super_block * sb )
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2010-09-20 18:19:06 +09:00
int err ;
2017-07-17 08:45:34 +01:00
if ( sb_rdonly ( sb ) )
2010-09-20 18:19:06 +09:00
return 0 ;
2009-04-06 19:01:35 -07:00
2010-09-20 18:19:06 +09:00
/* Mark super block clean */
down_write ( & nilfs - > ns_sem ) ;
2011-03-09 11:05:08 +09:00
err = nilfs_cleanup_super ( sb ) ;
2010-09-20 18:19:06 +09:00
up_write ( & nilfs - > ns_sem ) ;
2009-04-06 19:01:35 -07:00
return err ;
}
2010-09-20 18:19:06 +09:00
static int nilfs_unfreeze ( struct super_block * sb )
2009-04-06 19:01:35 -07:00
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2009-04-06 19:01:35 -07:00
2017-07-17 08:45:34 +01:00
if ( sb_rdonly ( sb ) )
2010-09-20 18:19:06 +09:00
return 0 ;
down_write ( & nilfs - > ns_sem ) ;
2011-03-09 11:05:08 +09:00
nilfs_setup_super ( sb , false ) ;
2010-09-20 18:19:06 +09:00
up_write ( & nilfs - > ns_sem ) ;
return 0 ;
2009-04-06 19:01:35 -07:00
}
static int nilfs_statfs ( struct dentry * dentry , struct kstatfs * buf )
{
struct super_block * sb = dentry - > d_sb ;
2015-03-17 22:25:59 +00:00
struct nilfs_root * root = NILFS_I ( d_inode ( dentry ) ) - > i_root ;
2010-08-14 14:48:32 +09:00
struct the_nilfs * nilfs = root - > nilfs ;
2009-03-26 10:16:57 +09:00
u64 id = huge_encode_dev ( sb - > s_bdev - > bd_dev ) ;
2009-04-06 19:01:35 -07:00
unsigned long long blocks ;
unsigned long overhead ;
unsigned long nrsvblocks ;
sector_t nfreeblocks ;
2013-07-03 15:08:05 -07:00
u64 nmaxinodes , nfreeinodes ;
2009-04-06 19:01:35 -07:00
int err ;
/*
* Compute all of the segment blocks
*
* The blocks before first segment and after last segment
* are excluded .
*/
blocks = nilfs - > ns_blocks_per_segment * nilfs - > ns_nsegments
- nilfs - > ns_first_data_block ;
nrsvblocks = nilfs - > ns_nrsvsegs * nilfs - > ns_blocks_per_segment ;
/*
* Compute the overhead
*
2010-03-14 03:32:40 +09:00
* When distributing meta data blocks outside segment structure ,
2009-04-06 19:01:35 -07:00
* We must count them as the overhead .
*/
overhead = 0 ;
err = nilfs_count_free_blocks ( nilfs , & nfreeblocks ) ;
if ( unlikely ( err ) )
return err ;
2013-07-03 15:08:05 -07:00
err = nilfs_ifile_count_free_inodes ( root - > ifile ,
& nmaxinodes , & nfreeinodes ) ;
if ( unlikely ( err ) ) {
2020-08-11 18:35:49 -07:00
nilfs_warn ( sb , " failed to count free inodes: err=%d " , err ) ;
2013-07-03 15:08:05 -07:00
if ( err = = - ERANGE ) {
/*
* If nilfs_palloc_count_max_entries ( ) returns
* - ERANGE error code then we simply treat
* curent inodes count as maximum possible and
* zero as free inodes value .
*/
2013-07-03 15:08:06 -07:00
nmaxinodes = atomic64_read ( & root - > inodes_count ) ;
2013-07-03 15:08:05 -07:00
nfreeinodes = 0 ;
err = 0 ;
} else
return err ;
}
2009-04-06 19:01:35 -07:00
buf - > f_type = NILFS_SUPER_MAGIC ;
buf - > f_bsize = sb - > s_blocksize ;
buf - > f_blocks = blocks - overhead ;
buf - > f_bfree = nfreeblocks ;
buf - > f_bavail = ( buf - > f_bfree > = nrsvblocks ) ?
( buf - > f_bfree - nrsvblocks ) : 0 ;
2013-07-03 15:08:05 -07:00
buf - > f_files = nmaxinodes ;
buf - > f_ffree = nfreeinodes ;
2009-04-06 19:01:35 -07:00
buf - > f_namelen = NILFS_NAME_LEN ;
2020-09-18 16:45:50 -04:00
buf - > f_fsid = u64_to_fsid ( id ) ;
2009-03-26 10:16:57 +09:00
2009-04-06 19:01:35 -07:00
return 0 ;
}
2011-12-08 21:32:45 -05:00
static int nilfs_show_options ( struct seq_file * seq , struct dentry * dentry )
2009-06-24 20:06:34 +09:00
{
2011-12-08 21:32:45 -05:00
struct super_block * sb = dentry - > d_sb ;
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2015-03-17 22:25:59 +00:00
struct nilfs_root * root = NILFS_I ( d_inode ( dentry ) ) - > i_root ;
2009-06-24 20:06:34 +09:00
2011-03-09 11:05:07 +09:00
if ( ! nilfs_test_opt ( nilfs , BARRIER ) )
2010-07-05 14:40:27 +09:00
seq_puts ( seq , " ,nobarrier " ) ;
2010-08-16 01:54:52 +09:00
if ( root - > cno ! = NILFS_CPTREE_CURRENT_CNO )
seq_printf ( seq , " ,cp=%llu " , ( unsigned long long ) root - > cno ) ;
2011-03-09 11:05:07 +09:00
if ( nilfs_test_opt ( nilfs , ERRORS_PANIC ) )
2010-07-05 14:40:27 +09:00
seq_puts ( seq , " ,errors=panic " ) ;
2011-03-09 11:05:07 +09:00
if ( nilfs_test_opt ( nilfs , ERRORS_CONT ) )
2010-07-05 14:40:27 +09:00
seq_puts ( seq , " ,errors=continue " ) ;
2011-03-09 11:05:07 +09:00
if ( nilfs_test_opt ( nilfs , STRICT_ORDER ) )
2010-07-05 14:40:27 +09:00
seq_puts ( seq , " ,order=strict " ) ;
2011-03-09 11:05:07 +09:00
if ( nilfs_test_opt ( nilfs , NORECOVERY ) )
2010-07-05 14:40:27 +09:00
seq_puts ( seq , " ,norecovery " ) ;
2011-03-09 11:05:07 +09:00
if ( nilfs_test_opt ( nilfs , DISCARD ) )
2010-07-05 14:40:27 +09:00
seq_puts ( seq , " ,discard " ) ;
2009-06-24 20:06:34 +09:00
return 0 ;
}
2009-09-21 17:01:09 -07:00
static const struct super_operations nilfs_sops = {
2009-04-06 19:01:35 -07:00
. alloc_inode = nilfs_alloc_inode ,
2019-04-15 20:23:38 -04:00
. free_inode = nilfs_free_inode ,
2009-04-06 19:01:35 -07:00
. dirty_inode = nilfs_dirty_inode ,
2010-06-07 11:55:00 -04:00
. evict_inode = nilfs_evict_inode ,
2009-04-06 19:01:35 -07:00
. put_super = nilfs_put_super ,
. sync_fs = nilfs_sync_fs ,
2010-09-20 18:19:06 +09:00
. freeze_fs = nilfs_freeze ,
. unfreeze_fs = nilfs_unfreeze ,
2009-04-06 19:01:35 -07:00
. statfs = nilfs_statfs ,
2009-06-24 20:06:34 +09:00
. show_options = nilfs_show_options
2009-04-06 19:01:35 -07:00
} ;
enum {
2024-04-25 03:27:16 +09:00
Opt_err , Opt_barrier , Opt_snapshot , Opt_order , Opt_norecovery ,
Opt_discard ,
2009-04-06 19:01:35 -07:00
} ;
2024-04-25 03:27:16 +09:00
static const struct constant_table nilfs_param_err [ ] = {
{ " continue " , NILFS_MOUNT_ERRORS_CONT } ,
{ " panic " , NILFS_MOUNT_ERRORS_PANIC } ,
{ " remount-ro " , NILFS_MOUNT_ERRORS_RO } ,
{ }
2009-04-06 19:01:35 -07:00
} ;
2024-04-25 03:27:16 +09:00
static const struct fs_parameter_spec nilfs_param_spec [ ] = {
fsparam_enum ( " errors " , Opt_err , nilfs_param_err ) ,
fsparam_flag_no ( " barrier " , Opt_barrier ) ,
fsparam_u64 ( " cp " , Opt_snapshot ) ,
fsparam_string ( " order " , Opt_order ) ,
fsparam_flag ( " norecovery " , Opt_norecovery ) ,
fsparam_flag_no ( " discard " , Opt_discard ) ,
{ }
} ;
2016-05-23 16:23:25 -07:00
2024-04-25 03:27:16 +09:00
struct nilfs_fs_context {
unsigned long ns_mount_opt ;
__u64 cno ;
} ;
2009-04-06 19:01:35 -07:00
2024-04-25 03:27:16 +09:00
static int nilfs_parse_param ( struct fs_context * fc , struct fs_parameter * param )
{
struct nilfs_fs_context * nilfs = fc - > fs_private ;
int is_remount = fc - > purpose = = FS_CONTEXT_FOR_RECONFIGURE ;
struct fs_parse_result result ;
int opt ;
opt = fs_parse ( fc , nilfs_param_spec , param , & result ) ;
if ( opt < 0 )
return opt ;
switch ( opt ) {
case Opt_barrier :
if ( result . negated )
2011-03-09 11:05:07 +09:00
nilfs_clear_opt ( nilfs , BARRIER ) ;
2024-04-25 03:27:16 +09:00
else
nilfs_set_opt ( nilfs , BARRIER ) ;
break ;
case Opt_order :
if ( strcmp ( param - > string , " relaxed " ) = = 0 )
/* Ordered data semantics */
nilfs_clear_opt ( nilfs , STRICT_ORDER ) ;
else if ( strcmp ( param - > string , " strict " ) = = 0 )
/* Strict in-order semantics */
nilfs_set_opt ( nilfs , STRICT_ORDER ) ;
else
return - EINVAL ;
break ;
case Opt_err :
nilfs - > ns_mount_opt & = ~ NILFS_MOUNT_ERROR_MODE ;
nilfs - > ns_mount_opt | = result . uint_32 ;
break ;
case Opt_snapshot :
if ( is_remount ) {
struct super_block * sb = fc - > root - > d_sb ;
nilfs_err ( sb ,
" \" %s \" option is invalid for remount " ,
param - > key ) ;
return - EINVAL ;
}
if ( result . uint_64 = = 0 ) {
nilfs_err ( NULL ,
" invalid option \" cp=0 \" : invalid checkpoint number 0 " ) ;
return - EINVAL ;
2009-04-06 19:01:35 -07:00
}
2024-04-25 03:27:16 +09:00
nilfs - > cno = result . uint_64 ;
break ;
case Opt_norecovery :
nilfs_set_opt ( nilfs , NORECOVERY ) ;
break ;
case Opt_discard :
if ( result . negated )
nilfs_clear_opt ( nilfs , DISCARD ) ;
else
nilfs_set_opt ( nilfs , DISCARD ) ;
break ;
default :
return - EINVAL ;
2009-04-06 19:01:35 -07:00
}
2011-03-09 11:05:07 +09:00
2024-04-25 03:27:16 +09:00
return 0 ;
2009-04-06 19:01:35 -07:00
}
2011-03-09 11:05:08 +09:00
static int nilfs_setup_super ( struct super_block * sb , int is_mount )
2009-04-06 19:01:35 -07:00
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2010-06-28 17:49:32 +09:00
struct nilfs_super_block * * sbp ;
int max_mnt_count ;
int mnt_count ;
/* nilfs->ns_sem must be locked by the caller. */
2011-03-09 11:05:08 +09:00
sbp = nilfs_prepare_super ( sb , 0 ) ;
2010-06-28 17:49:32 +09:00
if ( ! sbp )
return - EIO ;
2010-09-20 18:19:06 +09:00
if ( ! is_mount )
goto skip_mount_setup ;
2010-06-28 17:49:32 +09:00
max_mnt_count = le16_to_cpu ( sbp [ 0 ] - > s_max_mnt_count ) ;
mnt_count = le16_to_cpu ( sbp [ 0 ] - > s_mnt_count ) ;
2009-04-06 19:01:35 -07:00
2009-11-19 16:58:40 +09:00
if ( nilfs - > ns_mount_state & NILFS_ERROR_FS ) {
2020-08-11 18:35:49 -07:00
nilfs_warn ( sb , " mounting fs with errors " ) ;
2009-04-06 19:01:35 -07:00
#if 0
} else if ( max_mnt_count > = 0 & & mnt_count > = max_mnt_count ) {
2020-08-11 18:35:49 -07:00
nilfs_warn ( sb , " maximal mount count reached " ) ;
2009-04-06 19:01:35 -07:00
# endif
}
if ( ! max_mnt_count )
2010-06-28 17:49:32 +09:00
sbp [ 0 ] - > s_max_mnt_count = cpu_to_le16 ( NILFS_DFL_MAX_MNT_COUNT ) ;
2009-04-06 19:01:35 -07:00
2010-06-28 17:49:32 +09:00
sbp [ 0 ] - > s_mnt_count = cpu_to_le16 ( mnt_count + 1 ) ;
2018-08-21 21:59:05 -07:00
sbp [ 0 ] - > s_mtime = cpu_to_le64 ( ktime_get_real_seconds ( ) ) ;
2010-09-20 18:19:06 +09:00
skip_mount_setup :
2010-06-28 17:49:32 +09:00
sbp [ 0 ] - > s_state =
cpu_to_le16 ( le16_to_cpu ( sbp [ 0 ] - > s_state ) & ~ NILFS_VALID_FS ) ;
2010-06-28 17:49:33 +09:00
/* synchronize sbp[1] with sbp[0] */
2011-01-21 16:40:31 +09:00
if ( sbp [ 1 ] )
memcpy ( sbp [ 1 ] , sbp [ 0 ] , nilfs - > ns_sbsize ) ;
2011-03-09 11:05:08 +09:00
return nilfs_commit_super ( sb , NILFS_SB_COMMIT_ALL ) ;
2009-04-06 19:01:35 -07:00
}
2009-04-06 19:01:59 -07:00
struct nilfs_super_block * nilfs_read_super_block ( struct super_block * sb ,
u64 pos , int blocksize ,
struct buffer_head * * pbh )
2009-04-06 19:01:35 -07:00
{
2009-04-06 19:01:59 -07:00
unsigned long long sb_index = pos ;
unsigned long offset ;
2009-04-06 19:01:35 -07:00
2009-04-06 19:01:59 -07:00
offset = do_div ( sb_index , blocksize ) ;
2009-04-06 19:01:35 -07:00
* pbh = sb_bread ( sb , sb_index ) ;
2009-04-06 19:01:59 -07:00
if ( ! * pbh )
2009-04-06 19:01:35 -07:00
return NULL ;
return ( struct nilfs_super_block * ) ( ( char * ) ( * pbh ) - > b_data + offset ) ;
}
2024-04-25 03:27:16 +09:00
int nilfs_store_magic ( struct super_block * sb ,
struct nilfs_super_block * sbp )
2009-04-06 19:01:35 -07:00
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2009-04-06 19:01:35 -07:00
sb - > s_magic = le16_to_cpu ( sbp - > s_magic ) ;
/* FS independent flags */
# ifdef NILFS_ATIME_DISABLE
2017-11-27 13:05:09 -08:00
sb - > s_flags | = SB_NOATIME ;
2009-04-06 19:01:35 -07:00
# endif
2011-03-09 11:05:07 +09:00
nilfs - > ns_resuid = le16_to_cpu ( sbp - > s_def_resuid ) ;
nilfs - > ns_resgid = le16_to_cpu ( sbp - > s_def_resgid ) ;
nilfs - > ns_interval = le32_to_cpu ( sbp - > s_c_interval ) ;
nilfs - > ns_watermark = le32_to_cpu ( sbp - > s_c_block_max ) ;
2009-04-06 19:01:35 -07:00
2024-04-25 03:27:16 +09:00
return 0 ;
2009-04-06 19:01:35 -07:00
}
2010-07-22 03:22:20 +09:00
int nilfs_check_feature_compatibility ( struct super_block * sb ,
struct nilfs_super_block * sbp )
{
__u64 features ;
features = le64_to_cpu ( sbp - > s_feature_incompat ) &
~ NILFS_FEATURE_INCOMPAT_SUPP ;
if ( features ) {
2020-08-11 18:35:49 -07:00
nilfs_err ( sb ,
2016-08-02 14:05:10 -07:00
" couldn't mount because of unsupported optional features (%llx) " ,
( unsigned long long ) features ) ;
2010-07-22 03:22:20 +09:00
return - EINVAL ;
}
features = le64_to_cpu ( sbp - > s_feature_compat_ro ) &
~ NILFS_FEATURE_COMPAT_RO_SUPP ;
2017-07-17 08:45:34 +01:00
if ( ! sb_rdonly ( sb ) & & features ) {
2020-08-11 18:35:49 -07:00
nilfs_err ( sb ,
2016-08-02 14:05:10 -07:00
" couldn't mount RDWR because of unsupported optional features (%llx) " ,
( unsigned long long ) features ) ;
2010-07-22 03:22:20 +09:00
return - EINVAL ;
}
return 0 ;
}
2010-08-26 01:52:51 +09:00
static int nilfs_get_root_dentry ( struct super_block * sb ,
struct nilfs_root * root ,
struct dentry * * root_dentry )
{
struct inode * inode ;
struct dentry * dentry ;
int ret = 0 ;
inode = nilfs_iget ( sb , root , NILFS_ROOT_INO ) ;
if ( IS_ERR ( inode ) ) {
ret = PTR_ERR ( inode ) ;
2020-08-11 18:35:49 -07:00
nilfs_err ( sb , " error %d getting root inode " , ret ) ;
2010-08-26 01:52:51 +09:00
goto out ;
}
if ( ! S_ISDIR ( inode - > i_mode ) | | ! inode - > i_blocks | | ! inode - > i_size ) {
iput ( inode ) ;
2020-08-11 18:35:49 -07:00
nilfs_err ( sb , " corrupt root inode " ) ;
2010-08-26 01:52:51 +09:00
ret = - EINVAL ;
goto out ;
}
2010-08-16 01:54:52 +09:00
if ( root - > cno = = NILFS_CPTREE_CURRENT_CNO ) {
dentry = d_find_alias ( inode ) ;
if ( ! dentry ) {
2012-01-08 22:15:13 -05:00
dentry = d_make_root ( inode ) ;
2010-08-16 01:54:52 +09:00
if ( ! dentry ) {
ret = - ENOMEM ;
goto failed_dentry ;
}
} else {
iput ( inode ) ;
}
} else {
2014-02-14 17:35:37 -05:00
dentry = d_obtain_root ( inode ) ;
2010-08-16 01:54:52 +09:00
if ( IS_ERR ( dentry ) ) {
ret = PTR_ERR ( dentry ) ;
goto failed_dentry ;
}
2010-08-26 01:52:51 +09:00
}
* root_dentry = dentry ;
out :
return ret ;
2010-08-16 01:54:52 +09:00
failed_dentry :
2020-08-11 18:35:49 -07:00
nilfs_err ( sb , " error %d getting root dentry " , ret ) ;
2010-08-16 01:54:52 +09:00
goto out ;
2010-08-26 01:52:51 +09:00
}
2010-08-26 02:15:41 +09:00
static int nilfs_attach_snapshot ( struct super_block * s , __u64 cno ,
struct dentry * * root_dentry )
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = s - > s_fs_info ;
2010-08-26 02:15:41 +09:00
struct nilfs_root * root ;
int ret ;
2012-07-30 14:42:07 -07:00
mutex_lock ( & nilfs - > ns_snapshot_mount_mutex ) ;
2010-08-26 02:15:41 +09:00
down_read ( & nilfs - > ns_segctor_sem ) ;
ret = nilfs_cpfile_is_snapshot ( nilfs - > ns_cpfile , cno ) ;
up_read ( & nilfs - > ns_segctor_sem ) ;
if ( ret < 0 ) {
ret = ( ret = = - ENOENT ) ? - EINVAL : ret ;
goto out ;
} else if ( ! ret ) {
2020-08-11 18:35:49 -07:00
nilfs_err ( s ,
2016-08-02 14:05:10 -07:00
" The specified checkpoint is not a snapshot (checkpoint number=%llu) " ,
( unsigned long long ) cno ) ;
2010-08-26 02:15:41 +09:00
ret = - EINVAL ;
goto out ;
}
2011-03-09 11:05:08 +09:00
ret = nilfs_attach_checkpoint ( s , cno , false , & root ) ;
2010-08-26 02:15:41 +09:00
if ( ret ) {
2020-08-11 18:35:49 -07:00
nilfs_err ( s ,
2016-08-02 14:05:10 -07:00
" error %d while loading snapshot (checkpoint number=%llu) " ,
ret , ( unsigned long long ) cno ) ;
2010-08-26 02:15:41 +09:00
goto out ;
}
ret = nilfs_get_root_dentry ( s , root , root_dentry ) ;
nilfs_put_root ( root ) ;
out :
2012-07-30 14:42:07 -07:00
mutex_unlock ( & nilfs - > ns_snapshot_mount_mutex ) ;
2010-08-26 02:15:41 +09:00
return ret ;
}
2010-08-16 01:54:52 +09:00
/**
2013-08-24 12:06:16 -04:00
* nilfs_tree_is_busy ( ) - try to shrink dentries of a checkpoint
2010-08-16 01:54:52 +09:00
* @ root_dentry : root dentry of the tree to be shrunk
*
* This function returns true if the tree was in - use .
*/
2013-08-24 12:06:16 -04:00
static bool nilfs_tree_is_busy ( struct dentry * root_dentry )
2010-08-16 01:54:52 +09:00
{
shrink_dcache_parent ( root_dentry ) ;
2013-08-24 12:06:16 -04:00
return d_count ( root_dentry ) > 1 ;
2010-08-16 01:54:52 +09:00
}
2010-09-13 11:16:34 +09:00
int nilfs_checkpoint_is_mounted ( struct super_block * sb , __u64 cno )
{
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2010-09-13 11:16:34 +09:00
struct nilfs_root * root ;
struct inode * inode ;
struct dentry * dentry ;
int ret ;
2015-04-16 12:46:45 -07:00
if ( cno > nilfs - > ns_cno )
2010-09-13 11:16:34 +09:00
return false ;
if ( cno > = nilfs_last_cno ( nilfs ) )
return true ; /* protect recent checkpoints */
ret = false ;
2011-03-09 11:05:08 +09:00
root = nilfs_lookup_root ( nilfs , cno ) ;
2010-09-13 11:16:34 +09:00
if ( root ) {
inode = nilfs_ilookup ( sb , root , NILFS_ROOT_INO ) ;
if ( inode ) {
dentry = d_find_alias ( inode ) ;
if ( dentry ) {
2013-08-24 12:06:16 -04:00
ret = nilfs_tree_is_busy ( dentry ) ;
2010-09-13 11:16:34 +09:00
dput ( dentry ) ;
}
iput ( inode ) ;
}
nilfs_put_root ( root ) ;
}
return ret ;
}
2009-04-06 19:01:35 -07:00
/**
* nilfs_fill_super ( ) - initialize a super block instance
* @ sb : super_block
2024-04-25 03:27:16 +09:00
* @ fc : filesystem context
2009-04-06 19:01:35 -07:00
*
2009-06-08 01:39:33 +09:00
* This function is called exclusively by nilfs - > ns_mount_mutex .
2009-04-06 19:01:35 -07:00
* So , the recovery process is protected from other simultaneous mounts .
*/
static int
2024-04-25 03:27:16 +09:00
nilfs_fill_super ( struct super_block * sb , struct fs_context * fc )
2009-04-06 19:01:35 -07:00
{
2010-09-09 02:07:56 +09:00
struct the_nilfs * nilfs ;
2010-08-25 17:45:44 +09:00
struct nilfs_root * fsroot ;
2024-04-25 03:27:16 +09:00
struct nilfs_fs_context * ctx = fc - > fs_private ;
2009-04-06 19:01:35 -07:00
__u64 cno ;
int err ;
2016-08-02 14:05:06 -07:00
nilfs = alloc_nilfs ( sb ) ;
2011-03-09 11:05:08 +09:00
if ( ! nilfs )
2009-04-06 19:01:35 -07:00
return - ENOMEM ;
2011-03-09 11:05:08 +09:00
sb - > s_fs_info = nilfs ;
2009-04-06 19:01:35 -07:00
2024-04-25 03:27:16 +09:00
err = init_nilfs ( nilfs , sb ) ;
2009-04-06 19:01:35 -07:00
if ( err )
2010-09-09 02:07:56 +09:00
goto failed_nilfs ;
2009-04-06 19:01:35 -07:00
2024-04-25 03:27:16 +09:00
/* Copy in parsed mount options */
nilfs - > ns_mount_opt = ctx - > ns_mount_opt ;
2009-04-06 19:01:35 -07:00
sb - > s_op = & nilfs_sops ;
sb - > s_export_op = & nilfs_export_ops ;
sb - > s_root = NULL ;
2009-04-06 19:02:00 -07:00
sb - > s_time_gran = 1 ;
2012-02-06 12:45:27 -05:00
sb - > s_max_links = NILFS_LINK_MAX ;
2010-10-07 14:19:48 +09:00
2021-08-09 16:17:44 +02:00
sb - > s_bdi = bdi_get ( sb - > s_bdev - > bd_disk - > bdi ) ;
2009-04-06 19:01:35 -07:00
2011-03-09 11:05:08 +09:00
err = load_nilfs ( nilfs , sb ) ;
2009-11-19 16:58:40 +09:00
if ( err )
2010-09-09 02:07:56 +09:00
goto failed_nilfs ;
2009-11-19 16:58:40 +09:00
2009-04-06 19:01:35 -07:00
cno = nilfs_last_cno ( nilfs ) ;
2011-03-09 11:05:08 +09:00
err = nilfs_attach_checkpoint ( sb , cno , true , & fsroot ) ;
2009-04-06 19:01:35 -07:00
if ( err ) {
2020-08-11 18:35:49 -07:00
nilfs_err ( sb ,
2016-08-02 14:05:10 -07:00
" error %d while loading last checkpoint (checkpoint number=%llu) " ,
err , ( unsigned long long ) cno ) ;
2010-09-05 12:20:59 +09:00
goto failed_unload ;
2009-04-06 19:01:35 -07:00
}
2017-07-17 08:45:34 +01:00
if ( ! sb_rdonly ( sb ) ) {
2011-03-09 11:05:08 +09:00
err = nilfs_attach_log_writer ( sb , fsroot ) ;
2009-04-06 19:01:35 -07:00
if ( err )
goto failed_checkpoint ;
}
2010-08-26 01:52:51 +09:00
err = nilfs_get_root_dentry ( sb , fsroot , & sb - > s_root ) ;
if ( err )
2009-04-06 19:01:35 -07:00
goto failed_segctor ;
2010-08-25 17:45:44 +09:00
nilfs_put_root ( fsroot ) ;
2009-04-06 19:01:35 -07:00
2017-07-17 08:45:34 +01:00
if ( ! sb_rdonly ( sb ) ) {
2009-04-06 19:01:35 -07:00
down_write ( & nilfs - > ns_sem ) ;
2011-03-09 11:05:08 +09:00
nilfs_setup_super ( sb , true ) ;
2009-04-06 19:01:35 -07:00
up_write ( & nilfs - > ns_sem ) ;
}
return 0 ;
failed_segctor :
2011-03-09 11:05:08 +09:00
nilfs_detach_log_writer ( sb ) ;
2009-04-06 19:01:35 -07:00
failed_checkpoint :
2010-08-25 17:45:44 +09:00
nilfs_put_root ( fsroot ) ;
2009-04-06 19:01:35 -07:00
2010-09-05 12:20:59 +09:00
failed_unload :
nilfs2: fix sysfs interface lifetime
The current nilfs2 sysfs support has issues with the timing of creation
and deletion of sysfs entries, potentially leading to null pointer
dereferences, use-after-free, and lockdep warnings.
Some of the sysfs attributes for nilfs2 per-filesystem instance refer to
metadata file "cpfile", "sufile", or "dat", but
nilfs_sysfs_create_device_group that creates those attributes is executed
before the inodes for these metadata files are loaded, and
nilfs_sysfs_delete_device_group which deletes these sysfs entries is
called after releasing their metadata file inodes.
Therefore, access to some of these sysfs attributes may occur outside of
the lifetime of these metadata files, resulting in inode NULL pointer
dereferences or use-after-free.
In addition, the call to nilfs_sysfs_create_device_group() is made during
the locking period of the semaphore "ns_sem" of nilfs object, so the
shrinker call caused by the memory allocation for the sysfs entries, may
derive lock dependencies "ns_sem" -> (shrinker) -> "locks acquired in
nilfs_evict_inode()".
Since nilfs2 may acquire "ns_sem" deep in the call stack holding other
locks via its error handler __nilfs_error(), this causes lockdep to report
circular locking. This is a false positive and no circular locking
actually occurs as no inodes exist yet when
nilfs_sysfs_create_device_group() is called. Fortunately, the lockdep
warnings can be resolved by simply moving the call to
nilfs_sysfs_create_device_group() out of "ns_sem".
This fixes these sysfs issues by revising where the device's sysfs
interface is created/deleted and keeping its lifetime within the lifetime
of the metadata files above.
Link: https://lkml.kernel.org/r/20230330205515.6167-1-konishi.ryusuke@gmail.com
Fixes: dd70edbde262 ("nilfs2: integrate sysfs support into driver")
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
Reported-by: syzbot+979fa7f9c0d086fdc282@syzkaller.appspotmail.com
Link: https://lkml.kernel.org/r/0000000000003414b505f7885f7e@google.com
Reported-by: syzbot+5b7d542076d9bddc3c6a@syzkaller.appspotmail.com
Link: https://lkml.kernel.org/r/0000000000006ac86605f5f44eb9@google.com
Cc: Viacheslav Dubeyko <slava@dubeyko.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-03-31 05:55:15 +09:00
nilfs_sysfs_delete_device_group ( nilfs ) ;
2010-09-05 12:20:59 +09:00
iput ( nilfs - > ns_sufile ) ;
iput ( nilfs - > ns_cpfile ) ;
iput ( nilfs - > ns_dat ) ;
2010-09-09 02:07:56 +09:00
failed_nilfs :
destroy_nilfs ( nilfs ) ;
2009-04-06 19:01:35 -07:00
return err ;
}
2024-04-25 03:27:16 +09:00
static int nilfs_reconfigure ( struct fs_context * fc )
2009-04-06 19:01:35 -07:00
{
2024-04-25 03:27:16 +09:00
struct nilfs_fs_context * ctx = fc - > fs_private ;
struct super_block * sb = fc - > root - > d_sb ;
2011-03-09 11:05:08 +09:00
struct the_nilfs * nilfs = sb - > s_fs_info ;
2010-08-16 01:54:52 +09:00
int err ;
2009-04-06 19:01:35 -07:00
2014-03-13 10:14:33 -04:00
sync_filesystem ( sb ) ;
2009-04-06 19:01:35 -07:00
2010-05-09 21:51:53 +09:00
err = - EINVAL ;
2009-04-06 19:01:35 -07:00
2009-11-20 03:28:01 +09:00
if ( ! nilfs_valid_fs ( nilfs ) ) {
2020-08-11 18:35:49 -07:00
nilfs_warn ( sb ,
" couldn't remount because the filesystem is in an incomplete recovery state " ) ;
2024-04-25 03:27:16 +09:00
goto ignore_opts ;
2009-11-20 03:28:01 +09:00
}
2024-04-25 03:27:16 +09:00
if ( ( bool ) ( fc - > sb_flags & SB_RDONLY ) = = sb_rdonly ( sb ) )
2009-04-06 19:01:35 -07:00
goto out ;
2024-04-25 03:27:16 +09:00
if ( fc - > sb_flags & SB_RDONLY ) {
2017-11-27 13:05:09 -08:00
sb - > s_flags | = SB_RDONLY ;
2009-04-06 19:01:35 -07:00
/*
* Remounting a valid RW partition RDONLY , so set
* the RDONLY flag and then mark the partition as valid again .
*/
down_write ( & nilfs - > ns_sem ) ;
2011-03-09 11:05:08 +09:00
nilfs_cleanup_super ( sb ) ;
2009-04-06 19:01:35 -07:00
up_write ( & nilfs - > ns_sem ) ;
} else {
2010-07-22 03:22:20 +09:00
__u64 features ;
2010-08-14 13:07:15 +09:00
struct nilfs_root * root ;
2010-07-22 03:22:20 +09:00
2009-04-06 19:01:35 -07:00
/*
* Mounting a RDONLY partition read - write , so reread and
* store the current valid flag . ( It may have been changed
* by fsck since we originally mounted the partition . )
*/
2010-07-22 03:22:20 +09:00
down_read ( & nilfs - > ns_sem ) ;
features = le64_to_cpu ( nilfs - > ns_sbp [ 0 ] - > s_feature_compat_ro ) &
~ NILFS_FEATURE_COMPAT_RO_SUPP ;
up_read ( & nilfs - > ns_sem ) ;
if ( features ) {
2020-08-11 18:35:49 -07:00
nilfs_warn ( sb ,
" couldn't remount RDWR because of unsupported optional features (%llx) " ,
( unsigned long long ) features ) ;
2010-07-22 03:22:20 +09:00
err = - EROFS ;
2024-04-25 03:27:16 +09:00
goto ignore_opts ;
2010-07-22 03:22:20 +09:00
}
2017-11-27 13:05:09 -08:00
sb - > s_flags & = ~ SB_RDONLY ;
2009-04-06 19:01:35 -07:00
2015-03-17 22:25:59 +00:00
root = NILFS_I ( d_inode ( sb - > s_root ) ) - > i_root ;
2011-03-09 11:05:08 +09:00
err = nilfs_attach_log_writer ( sb , root ) ;
2024-04-25 03:27:16 +09:00
if ( err ) {
sb - > s_flags | = SB_RDONLY ;
goto ignore_opts ;
}
2009-04-06 19:01:35 -07:00
down_write ( & nilfs - > ns_sem ) ;
2011-03-09 11:05:08 +09:00
nilfs_setup_super ( sb , true ) ;
2009-04-06 19:01:35 -07:00
up_write ( & nilfs - > ns_sem ) ;
}
out :
2024-04-25 03:27:16 +09:00
sb - > s_flags = ( sb - > s_flags & ~ SB_POSIXACL ) ;
/* Copy over parsed remount options */
nilfs - > ns_mount_opt = ctx - > ns_mount_opt ;
2009-04-06 19:01:35 -07:00
return 0 ;
2024-04-25 03:27:16 +09:00
ignore_opts :
return err ;
2009-04-06 19:01:35 -07:00
}
2024-04-25 03:27:16 +09:00
static int
nilfs_get_tree ( struct fs_context * fc )
2009-04-06 19:01:35 -07:00
{
2024-04-25 03:27:16 +09:00
struct nilfs_fs_context * ctx = fc - > fs_private ;
2009-06-08 01:39:29 +09:00
struct super_block * s ;
2023-08-02 17:41:21 +02:00
dev_t dev ;
int err ;
2009-04-06 19:01:35 -07:00
2024-04-25 03:27:16 +09:00
if ( ctx - > cno & & ! ( fc - > sb_flags & SB_RDONLY ) ) {
nilfs_err ( NULL ,
" invalid option \" cp=%llu \" : read-only option is not specified " ,
ctx - > cno ) ;
return - EINVAL ;
}
2009-04-06 19:01:35 -07:00
2024-04-25 03:27:16 +09:00
err = lookup_bdev ( fc - > source , & dev ) ;
2023-08-02 17:41:21 +02:00
if ( err )
2024-04-25 03:27:16 +09:00
return err ;
2009-04-06 19:01:35 -07:00
2024-04-25 03:27:16 +09:00
s = sget_dev ( fc , dev ) ;
2023-08-02 17:41:21 +02:00
if ( IS_ERR ( s ) )
2024-04-25 03:27:16 +09:00
return PTR_ERR ( s ) ;
2009-04-06 19:01:35 -07:00
if ( ! s - > s_root ) {
2024-04-25 03:27:16 +09:00
err = setup_bdev_super ( s , fc - > sb_flags , fc ) ;
2023-08-02 17:41:21 +02:00
if ( ! err )
2024-04-25 03:27:16 +09:00
err = nilfs_fill_super ( s , fc ) ;
2009-04-06 19:01:35 -07:00
if ( err )
2010-09-09 02:07:56 +09:00
goto failed_super ;
2009-04-06 19:01:35 -07:00
2017-11-27 13:05:09 -08:00
s - > s_flags | = SB_ACTIVE ;
2024-04-25 03:27:16 +09:00
} else if ( ! ctx - > cno ) {
2013-08-24 12:06:16 -04:00
if ( nilfs_tree_is_busy ( s - > s_root ) ) {
2024-04-25 03:27:16 +09:00
if ( ( fc - > sb_flags ^ s - > s_flags ) & SB_RDONLY ) {
2020-08-11 18:35:49 -07:00
nilfs_err ( s ,
2016-08-02 14:05:10 -07:00
" the device already has a %s mount. " ,
2017-07-17 08:45:34 +01:00
sb_rdonly ( s ) ? " read-only " : " read/write " ) ;
2010-08-16 01:54:52 +09:00
err = - EBUSY ;
goto failed_super ;
}
2013-08-24 12:06:16 -04:00
} else {
2010-08-16 01:54:52 +09:00
/*
2024-04-25 03:27:16 +09:00
* Try reconfigure to setup mount states if the current
2010-08-16 01:54:52 +09:00
* tree is not mounted and only snapshots use this sb .
2024-04-25 03:27:16 +09:00
*
* Since nilfs_reconfigure ( ) requires fc - > root to be
* set , set it first and release it on failure .
2010-08-16 01:54:52 +09:00
*/
2024-04-25 03:27:16 +09:00
fc - > root = dget ( s - > s_root ) ;
err = nilfs_reconfigure ( fc ) ;
if ( err ) {
dput ( fc - > root ) ;
fc - > root = NULL ; /* prevent double release */
2010-08-16 01:54:52 +09:00
goto failed_super ;
2024-04-25 03:27:16 +09:00
}
return 0 ;
2010-08-16 01:54:52 +09:00
}
2009-04-06 19:01:35 -07:00
}
2024-04-25 03:27:16 +09:00
if ( ctx - > cno ) {
2023-08-02 17:41:21 +02:00
struct dentry * root_dentry ;
2024-04-25 03:27:16 +09:00
err = nilfs_attach_snapshot ( s , ctx - > cno , & root_dentry ) ;
2010-09-09 02:07:56 +09:00
if ( err )
2010-08-16 01:54:52 +09:00
goto failed_super ;
2024-04-25 03:27:16 +09:00
fc - > root = root_dentry ;
return 0 ;
2009-04-06 19:01:35 -07:00
}
2024-04-25 03:27:16 +09:00
fc - > root = dget ( s - > s_root ) ;
return 0 ;
2009-04-06 19:01:35 -07:00
2010-08-16 01:54:52 +09:00
failed_super :
2009-08-09 00:52:02 +04:00
deactivate_locked_super ( s ) ;
2024-04-25 03:27:16 +09:00
return err ;
}
static void nilfs_free_fc ( struct fs_context * fc )
{
kfree ( fc - > fs_private ) ;
}
static const struct fs_context_operations nilfs_context_ops = {
. parse_param = nilfs_parse_param ,
. get_tree = nilfs_get_tree ,
. reconfigure = nilfs_reconfigure ,
. free = nilfs_free_fc ,
} ;
static int nilfs_init_fs_context ( struct fs_context * fc )
{
struct nilfs_fs_context * ctx ;
ctx = kzalloc ( sizeof ( * ctx ) , GFP_KERNEL ) ;
if ( ! ctx )
return - ENOMEM ;
ctx - > ns_mount_opt = NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER ;
fc - > fs_private = ctx ;
fc - > ops = & nilfs_context_ops ;
return 0 ;
2009-04-06 19:01:35 -07:00
}
struct file_system_type nilfs_fs_type = {
. owner = THIS_MODULE ,
. name = " nilfs2 " ,
. kill_sb = kill_block_super ,
. fs_flags = FS_REQUIRES_DEV ,
2024-04-25 03:27:16 +09:00
. init_fs_context = nilfs_init_fs_context ,
. parameters = nilfs_param_spec ,
2009-04-06 19:01:35 -07:00
} ;
2013-03-02 19:39:14 -08:00
MODULE_ALIAS_FS ( " nilfs2 " ) ;
2009-04-06 19:01:35 -07:00
2010-04-06 00:54:11 +08:00
static void nilfs_inode_init_once ( void * obj )
2009-04-06 19:01:35 -07:00
{
2010-04-06 00:54:11 +08:00
struct nilfs_inode_info * ii = obj ;
2009-04-06 19:01:35 -07:00
2010-04-06 00:54:11 +08:00
INIT_LIST_HEAD ( & ii - > i_dirty ) ;
# ifdef CONFIG_NILFS_XATTR
init_rwsem ( & ii - > xattr_sem ) ;
# endif
inode_init_once ( & ii - > vfs_inode ) ;
}
2009-04-06 19:01:35 -07:00
2010-04-06 00:54:11 +08:00
static void nilfs_segbuf_init_once ( void * obj )
{
memset ( obj , 0 , sizeof ( struct nilfs_segment_buffer ) ) ;
}
2009-04-06 19:01:35 -07:00
2010-04-06 00:54:11 +08:00
static void nilfs_destroy_cachep ( void )
{
2012-09-26 11:33:07 +10:00
/*
* Make sure all delayed rcu free inodes are flushed before we
* destroy cache .
*/
rcu_barrier ( ) ;
2015-11-06 16:31:40 -08:00
kmem_cache_destroy ( nilfs_inode_cachep ) ;
kmem_cache_destroy ( nilfs_transaction_cachep ) ;
kmem_cache_destroy ( nilfs_segbuf_cachep ) ;
kmem_cache_destroy ( nilfs_btree_path_cache ) ;
2010-04-06 00:54:11 +08:00
}
2009-04-06 19:01:35 -07:00
2010-04-06 00:54:11 +08:00
static int __init nilfs_init_cachep ( void )
{
nilfs_inode_cachep = kmem_cache_create ( " nilfs2_inode_cache " ,
sizeof ( struct nilfs_inode_info ) , 0 ,
2016-01-14 15:18:21 -08:00
SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT ,
nilfs_inode_init_once ) ;
2010-04-06 00:54:11 +08:00
if ( ! nilfs_inode_cachep )
goto fail ;
nilfs_transaction_cachep = kmem_cache_create ( " nilfs2_transaction_cache " ,
sizeof ( struct nilfs_transaction_info ) , 0 ,
SLAB_RECLAIM_ACCOUNT , NULL ) ;
if ( ! nilfs_transaction_cachep )
goto fail ;
nilfs_segbuf_cachep = kmem_cache_create ( " nilfs2_segbuf_cache " ,
sizeof ( struct nilfs_segment_buffer ) , 0 ,
SLAB_RECLAIM_ACCOUNT , nilfs_segbuf_init_once ) ;
if ( ! nilfs_segbuf_cachep )
goto fail ;
nilfs_btree_path_cache = kmem_cache_create ( " nilfs2_btree_path_cache " ,
sizeof ( struct nilfs_btree_path ) * NILFS_BTREE_LEVEL_MAX ,
0 , 0 , NULL ) ;
if ( ! nilfs_btree_path_cache )
goto fail ;
2009-04-06 19:01:35 -07:00
return 0 ;
2010-04-06 00:54:11 +08:00
fail :
nilfs_destroy_cachep ( ) ;
return - ENOMEM ;
}
static int __init init_nilfs_fs ( void )
{
int err ;
2009-04-06 19:01:35 -07:00
2010-04-06 00:54:11 +08:00
err = nilfs_init_cachep ( ) ;
if ( err )
goto fail ;
2009-04-06 19:01:35 -07:00
2014-08-08 14:20:55 -07:00
err = nilfs_sysfs_init ( ) ;
2010-04-06 00:54:11 +08:00
if ( err )
goto free_cachep ;
2009-04-06 19:01:35 -07:00
2014-08-08 14:20:55 -07:00
err = register_filesystem ( & nilfs_fs_type ) ;
if ( err )
goto deinit_sysfs_entry ;
2010-04-09 23:09:53 +08:00
printk ( KERN_INFO " NILFS version 2 loaded \n " ) ;
2010-04-06 00:54:11 +08:00
return 0 ;
2009-04-06 19:01:35 -07:00
2014-08-08 14:20:55 -07:00
deinit_sysfs_entry :
nilfs_sysfs_exit ( ) ;
2010-04-06 00:54:11 +08:00
free_cachep :
nilfs_destroy_cachep ( ) ;
fail :
2009-04-06 19:01:35 -07:00
return err ;
}
static void __exit exit_nilfs_fs ( void )
{
2010-04-06 00:54:11 +08:00
nilfs_destroy_cachep ( ) ;
2014-08-08 14:20:55 -07:00
nilfs_sysfs_exit ( ) ;
2009-04-06 19:01:35 -07:00
unregister_filesystem ( & nilfs_fs_type ) ;
}
module_init ( init_nilfs_fs )
module_exit ( exit_nilfs_fs )