2018-06-06 05:42:14 +03:00
// SPDX-License-Identifier: GPL-2.0
2005-04-17 02:20:36 +04:00
/*
2006-06-19 02:40:27 +04:00
* Copyright ( c ) 2000 - 2006 Silicon Graphics , Inc .
2005-11-02 06:58:39 +03:00
* All Rights Reserved .
2005-04-17 02:20:36 +04:00
*/
2009-12-15 02:14:59 +03:00
2005-04-17 02:20:36 +04:00
# include "xfs.h"
2013-10-23 03:36:05 +04:00
# include "xfs_shared.h"
2013-08-12 14:49:26 +04:00
# include "xfs_format.h"
2013-10-23 03:50:10 +04:00
# include "xfs_log_format.h"
# include "xfs_trans_resv.h"
2005-04-17 02:20:36 +04:00
# include "xfs_sb.h"
# include "xfs_mount.h"
# include "xfs_inode.h"
2005-11-02 06:38:42 +03:00
# include "xfs_btree.h"
2005-04-17 02:20:36 +04:00
# include "xfs_bmap.h"
2013-10-23 03:51:50 +04:00
# include "xfs_alloc.h"
2007-10-11 12:11:14 +04:00
# include "xfs_fsops.h"
2013-10-23 03:50:10 +04:00
# include "xfs_trans.h"
2005-04-17 02:20:36 +04:00
# include "xfs_buf_item.h"
2013-10-23 03:50:10 +04:00
# include "xfs_log.h"
2007-11-23 08:29:32 +03:00
# include "xfs_log_priv.h"
2013-08-12 14:49:37 +04:00
# include "xfs_dir2.h"
2008-07-18 11:11:46 +04:00
# include "xfs_extfree_item.h"
# include "xfs_mru_cache.h"
# include "xfs_inode_item.h"
2012-10-08 14:56:09 +04:00
# include "xfs_icache.h"
2009-12-15 02:14:59 +03:00
# include "xfs_trace.h"
2013-06-27 10:04:53 +04:00
# include "xfs_icreate_item.h"
2013-10-23 03:51:50 +04:00
# include "xfs_filestream.h"
# include "xfs_quota.h"
2014-09-09 05:52:42 +04:00
# include "xfs_sysfs.h"
2016-03-09 00:15:14 +03:00
# include "xfs_ondisk.h"
2016-08-03 05:04:45 +03:00
# include "xfs_rmap_item.h"
2016-10-03 19:11:20 +03:00
# include "xfs_refcount_item.h"
2016-10-03 19:11:25 +03:00
# include "xfs_bmap_item.h"
2016-10-03 19:11:38 +03:00
# include "xfs_reflink.h"
2021-01-23 03:48:44 +03:00
# include "xfs_pwork.h"
2021-06-02 03:48:24 +03:00
# include "xfs_ag.h"
2021-10-13 00:11:01 +03:00
# include "xfs_defer.h"
2022-05-22 08:59:48 +03:00
# include "xfs_attr_item.h"
2022-05-27 03:33:29 +03:00
# include "xfs_xattr.h"
2022-07-14 04:47:42 +03:00
# include "xfs_iunlink_item.h"
xfs: test dir/attr hash when loading module
Back in the 6.2-rc1 days, Eric Whitney reported a fstests regression in
ext4 against generic/454. The cause of this test failure was the
unfortunate combination of setting an xattr name containing UTF8 encoded
emoji, an xattr hash function that accepted a char pointer with no
explicit signedness, signed type extension of those chars to an int, and
the 6.2 build tools maintainers deciding to mandate -funsigned-char
across the board. As a result, the ondisk extended attribute structure
written out by 6.1 and 6.2 were not the same.
This discrepancy, in fact, had been noticeable if a filesystem with such
an xattr were moved between any two architectures that don't employ the
same signedness of a raw "char" declaration. The only reason anyone
noticed is that x86 gcc defaults to signed, and no such -funsigned-char
update was made to e2fsprogs, so e2fsck immediately started reporting
data corruption.
After a day and a half of discussing how to handle this use case (xattrs
with bit 7 set anywhere in the name) without breaking existing users,
Linus merged his own patch and didn't tell the maintainer. None of the
ext4 developers realized this until AUTOSEL announced that the commit
had been backported to stable.
In the end, this problem could have been detected much earlier if there
had been any useful tests of hash function(s) in use inside ext4 to make
sure that they always produce the same outputs given the same inputs.
The XFS dirent/xattr name hash takes a uint8_t*, so I don't think it's
vulnerable to this problem. However, let's avoid all this drama by
adding our own self test to check that the da hash produces the same
outputs for a static pile of inputs on various platforms. This enables
us to fix any breakage that may result in a controlled fashion. The
buffer and test data are identical to the patches submitted to xfsprogs.
Link: https://lore.kernel.org/linux-ext4/Y8bpkm3jA3bDm3eL@debian-BULLSEYE-live-builder-AMD64/
Link: https://lore.kernel.org/linux-xfs/ZBUKCRR7xvIqPrpX@destitution/T/#md38272cc684e2c0d61494435ccbb91f022e8dee4
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-03-16 19:31:20 +03:00
# include "xfs_dahash_test.h"
2005-04-17 02:20:36 +04:00
2018-10-18 09:20:19 +03:00
# include <linux/magic.h>
2019-11-05 00:58:46 +03:00
# include <linux/fs_context.h>
# include <linux/fs_parser.h>
2005-04-17 02:20:36 +04:00
2009-09-22 04:01:09 +04:00
static const struct super_operations xfs_super_operations ;
2014-09-09 05:52:42 +04:00
2014-09-29 04:46:08 +04:00
static struct kset * xfs_kset ; /* top-level xfs sysfs dir */
2014-09-09 05:52:42 +04:00
# ifdef DEBUG
static struct xfs_kobj xfs_dbg_kobj ; /* global debug sysfs attrs */
# endif
2005-04-17 02:20:36 +04:00
2021-08-06 21:05:38 +03:00
# ifdef CONFIG_HOTPLUG_CPU
static LIST_HEAD ( xfs_mount_list ) ;
static DEFINE_SPINLOCK ( xfs_mount_list_lock ) ;
static inline void xfs_mount_list_add ( struct xfs_mount * mp )
{
spin_lock ( & xfs_mount_list_lock ) ;
list_add ( & mp - > m_mount_list , & xfs_mount_list ) ;
spin_unlock ( & xfs_mount_list_lock ) ;
}
static inline void xfs_mount_list_del ( struct xfs_mount * mp )
{
spin_lock ( & xfs_mount_list_lock ) ;
list_del ( & mp - > m_mount_list ) ;
spin_unlock ( & xfs_mount_list_lock ) ;
}
# else /* !CONFIG_HOTPLUG_CPU */
static inline void xfs_mount_list_add ( struct xfs_mount * mp ) { }
static inline void xfs_mount_list_del ( struct xfs_mount * mp ) { }
# endif
2020-05-04 19:02:42 +03:00
enum xfs_dax_mode {
XFS_DAX_INODE = 0 ,
XFS_DAX_ALWAYS = 1 ,
XFS_DAX_NEVER = 2 ,
} ;
static void
xfs_mount_set_dax_mode (
struct xfs_mount * mp ,
enum xfs_dax_mode mode )
{
switch ( mode ) {
case XFS_DAX_INODE :
2021-08-19 04:46:52 +03:00
mp - > m_features & = ~ ( XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER ) ;
2020-05-04 19:02:42 +03:00
break ;
case XFS_DAX_ALWAYS :
2021-08-19 04:46:52 +03:00
mp - > m_features | = XFS_FEAT_DAX_ALWAYS ;
mp - > m_features & = ~ XFS_FEAT_DAX_NEVER ;
2020-05-04 19:02:42 +03:00
break ;
case XFS_DAX_NEVER :
2021-08-19 04:46:52 +03:00
mp - > m_features | = XFS_FEAT_DAX_NEVER ;
mp - > m_features & = ~ XFS_FEAT_DAX_ALWAYS ;
2020-05-04 19:02:42 +03:00
break ;
}
}
static const struct constant_table dax_param_enums [ ] = {
{ " inode " , XFS_DAX_INODE } ,
{ " always " , XFS_DAX_ALWAYS } ,
{ " never " , XFS_DAX_NEVER } ,
{ }
} ;
2008-07-18 11:12:36 +04:00
/*
* Table driven mount option parser .
*/
enum {
2019-10-28 18:41:42 +03:00
Opt_logbufs , Opt_logbsize , Opt_logdev , Opt_rtdev ,
2016-03-02 01:55:38 +03:00
Opt_wsync , Opt_noalign , Opt_swalloc , Opt_sunit , Opt_swidth , Opt_nouuid ,
2019-04-28 18:32:52 +03:00
Opt_grpid , Opt_nogrpid , Opt_bsdgroups , Opt_sysvgroups ,
2018-07-26 19:11:27 +03:00
Opt_allocsize , Opt_norecovery , Opt_inode64 , Opt_inode32 , Opt_ikeep ,
Opt_noikeep , Opt_largeio , Opt_nolargeio , Opt_attr2 , Opt_noattr2 ,
Opt_filestreams , Opt_quota , Opt_noquota , Opt_usrquota , Opt_grpquota ,
Opt_prjquota , Opt_uquota , Opt_gquota , Opt_pquota ,
2016-03-02 01:55:38 +03:00
Opt_uqnoenforce , Opt_gqnoenforce , Opt_pqnoenforce , Opt_qnoenforce ,
2020-05-04 19:02:42 +03:00
Opt_discard , Opt_nodiscard , Opt_dax , Opt_dax_enum ,
2008-07-18 11:12:36 +04:00
} ;
2019-09-07 14:23:15 +03:00
static const struct fs_parameter_spec xfs_fs_parameters [ ] = {
2019-11-05 00:58:46 +03:00
fsparam_u32 ( " logbufs " , Opt_logbufs ) ,
fsparam_string ( " logbsize " , Opt_logbsize ) ,
fsparam_string ( " logdev " , Opt_logdev ) ,
fsparam_string ( " rtdev " , Opt_rtdev ) ,
fsparam_flag ( " wsync " , Opt_wsync ) ,
fsparam_flag ( " noalign " , Opt_noalign ) ,
fsparam_flag ( " swalloc " , Opt_swalloc ) ,
fsparam_u32 ( " sunit " , Opt_sunit ) ,
fsparam_u32 ( " swidth " , Opt_swidth ) ,
fsparam_flag ( " nouuid " , Opt_nouuid ) ,
fsparam_flag ( " grpid " , Opt_grpid ) ,
fsparam_flag ( " nogrpid " , Opt_nogrpid ) ,
fsparam_flag ( " bsdgroups " , Opt_bsdgroups ) ,
fsparam_flag ( " sysvgroups " , Opt_sysvgroups ) ,
fsparam_string ( " allocsize " , Opt_allocsize ) ,
fsparam_flag ( " norecovery " , Opt_norecovery ) ,
fsparam_flag ( " inode64 " , Opt_inode64 ) ,
fsparam_flag ( " inode32 " , Opt_inode32 ) ,
fsparam_flag ( " ikeep " , Opt_ikeep ) ,
fsparam_flag ( " noikeep " , Opt_noikeep ) ,
fsparam_flag ( " largeio " , Opt_largeio ) ,
fsparam_flag ( " nolargeio " , Opt_nolargeio ) ,
fsparam_flag ( " attr2 " , Opt_attr2 ) ,
fsparam_flag ( " noattr2 " , Opt_noattr2 ) ,
fsparam_flag ( " filestreams " , Opt_filestreams ) ,
fsparam_flag ( " quota " , Opt_quota ) ,
fsparam_flag ( " noquota " , Opt_noquota ) ,
fsparam_flag ( " usrquota " , Opt_usrquota ) ,
fsparam_flag ( " grpquota " , Opt_grpquota ) ,
fsparam_flag ( " prjquota " , Opt_prjquota ) ,
fsparam_flag ( " uquota " , Opt_uquota ) ,
fsparam_flag ( " gquota " , Opt_gquota ) ,
fsparam_flag ( " pquota " , Opt_pquota ) ,
fsparam_flag ( " uqnoenforce " , Opt_uqnoenforce ) ,
fsparam_flag ( " gqnoenforce " , Opt_gqnoenforce ) ,
fsparam_flag ( " pqnoenforce " , Opt_pqnoenforce ) ,
fsparam_flag ( " qnoenforce " , Opt_qnoenforce ) ,
fsparam_flag ( " discard " , Opt_discard ) ,
fsparam_flag ( " nodiscard " , Opt_nodiscard ) ,
fsparam_flag ( " dax " , Opt_dax ) ,
2020-05-04 19:02:42 +03:00
fsparam_enum ( " dax " , Opt_dax_enum , dax_param_enums ) ,
2019-11-05 00:58:46 +03:00
{ }
2008-07-18 11:12:36 +04:00
} ;
2007-11-23 08:29:32 +03:00
struct proc_xfs_info {
2015-06-04 02:19:18 +03:00
uint64_t flag ;
char * str ;
2007-11-23 08:29:32 +03:00
} ;
2019-10-28 18:41:47 +03:00
static int
xfs_fs_show_options (
struct seq_file * m ,
struct dentry * root )
2007-11-23 08:29:32 +03:00
{
static struct proc_xfs_info xfs_info_set [ ] = {
/* the few simple ones we can get from the mount struct */
2021-08-19 04:46:52 +03:00
{ XFS_FEAT_IKEEP , " ,ikeep " } ,
{ XFS_FEAT_WSYNC , " ,wsync " } ,
{ XFS_FEAT_NOALIGN , " ,noalign " } ,
{ XFS_FEAT_SWALLOC , " ,swalloc " } ,
{ XFS_FEAT_NOUUID , " ,nouuid " } ,
{ XFS_FEAT_NORECOVERY , " ,norecovery " } ,
{ XFS_FEAT_ATTR2 , " ,attr2 " } ,
{ XFS_FEAT_FILESTREAMS , " ,filestreams " } ,
{ XFS_FEAT_GRPID , " ,grpid " } ,
{ XFS_FEAT_DISCARD , " ,discard " } ,
{ XFS_FEAT_LARGE_IOSIZE , " ,largeio " } ,
{ XFS_FEAT_DAX_ALWAYS , " ,dax=always " } ,
{ XFS_FEAT_DAX_NEVER , " ,dax=never " } ,
2007-11-23 08:29:32 +03:00
{ 0 , NULL }
} ;
2019-10-28 18:41:47 +03:00
struct xfs_mount * mp = XFS_M ( root - > d_sb ) ;
2007-11-23 08:29:32 +03:00
struct proc_xfs_info * xfs_infop ;
for ( xfs_infop = xfs_info_set ; xfs_infop - > flag ; xfs_infop + + ) {
2021-08-19 04:46:52 +03:00
if ( mp - > m_features & xfs_infop - > flag )
2007-11-23 08:29:32 +03:00
seq_puts ( m , xfs_infop - > str ) ;
}
2019-10-28 18:41:47 +03:00
2021-08-19 04:46:52 +03:00
seq_printf ( m , " ,inode%d " , xfs_has_small_inums ( mp ) ? 32 : 64 ) ;
2007-11-23 08:29:32 +03:00
2021-08-19 04:46:52 +03:00
if ( xfs_has_allocsize ( mp ) )
2016-03-02 01:55:38 +03:00
seq_printf ( m , " ,allocsize=%dk " ,
2019-10-28 18:41:46 +03:00
( 1 < < mp - > m_allocsize_log ) > > 10 ) ;
2007-11-23 08:29:32 +03:00
if ( mp - > m_logbufs > 0 )
2016-03-02 01:55:38 +03:00
seq_printf ( m , " ,logbufs=%d " , mp - > m_logbufs ) ;
2007-11-23 08:29:32 +03:00
if ( mp - > m_logbsize > 0 )
2016-03-02 01:55:38 +03:00
seq_printf ( m , " ,logbsize=%dk " , mp - > m_logbsize > > 10 ) ;
2007-11-23 08:29:32 +03:00
if ( mp - > m_logname )
2016-03-02 01:55:38 +03:00
seq_show_option ( m , " logdev " , mp - > m_logname ) ;
2007-11-23 08:29:32 +03:00
if ( mp - > m_rtname )
2016-03-02 01:55:38 +03:00
seq_show_option ( m , " rtdev " , mp - > m_rtname ) ;
2007-11-23 08:29:32 +03:00
if ( mp - > m_dalign > 0 )
2016-03-02 01:55:38 +03:00
seq_printf ( m , " ,sunit=%d " ,
2007-11-23 08:29:32 +03:00
( int ) XFS_FSB_TO_BB ( mp , mp - > m_dalign ) ) ;
if ( mp - > m_swidth > 0 )
2016-03-02 01:55:38 +03:00
seq_printf ( m , " ,swidth=%d " ,
2007-11-23 08:29:32 +03:00
( int ) XFS_FSB_TO_BB ( mp , mp - > m_swidth ) ) ;
2021-08-06 21:05:37 +03:00
if ( mp - > m_qflags & XFS_UQUOTA_ENFD )
seq_puts ( m , " ,usrquota " ) ;
else if ( mp - > m_qflags & XFS_UQUOTA_ACCT )
seq_puts ( m , " ,uqnoenforce " ) ;
2007-11-23 08:29:32 +03:00
2021-08-06 21:05:37 +03:00
if ( mp - > m_qflags & XFS_PQUOTA_ENFD )
seq_puts ( m , " ,prjquota " ) ;
else if ( mp - > m_qflags & XFS_PQUOTA_ACCT )
seq_puts ( m , " ,pqnoenforce " ) ;
if ( mp - > m_qflags & XFS_GQUOTA_ENFD )
seq_puts ( m , " ,grpquota " ) ;
else if ( mp - > m_qflags & XFS_GQUOTA_ACCT )
seq_puts ( m , " ,gqnoenforce " ) ;
2007-11-23 08:29:32 +03:00
if ( ! ( mp - > m_qflags & XFS_ALL_QUOTA_ACCT ) )
2016-03-02 01:55:38 +03:00
seq_puts ( m , " ,noquota " ) ;
2019-10-28 18:41:47 +03:00
return 0 ;
2007-11-23 08:29:32 +03:00
}
2019-05-02 06:26:30 +03:00
2023-02-13 01:14:52 +03:00
static bool
xfs_set_inode_alloc_perag (
struct xfs_perag * pag ,
xfs_ino_t ino ,
xfs_agnumber_t max_metadata )
{
if ( ! xfs_is_inode32 ( pag - > pag_mount ) ) {
set_bit ( XFS_AGSTATE_ALLOWS_INODES , & pag - > pag_opstate ) ;
clear_bit ( XFS_AGSTATE_PREFERS_METADATA , & pag - > pag_opstate ) ;
return false ;
}
if ( ino > XFS_MAXINUMBER_32 ) {
clear_bit ( XFS_AGSTATE_ALLOWS_INODES , & pag - > pag_opstate ) ;
clear_bit ( XFS_AGSTATE_PREFERS_METADATA , & pag - > pag_opstate ) ;
return false ;
}
set_bit ( XFS_AGSTATE_ALLOWS_INODES , & pag - > pag_opstate ) ;
if ( pag - > pag_agno < max_metadata )
set_bit ( XFS_AGSTATE_PREFERS_METADATA , & pag - > pag_opstate ) ;
else
clear_bit ( XFS_AGSTATE_PREFERS_METADATA , & pag - > pag_opstate ) ;
return true ;
}
xfs: allow inode allocations in post-growfs disk space
Today, if we perform an xfs_growfs which adds allocation groups,
mp->m_maxagi is not properly updated when the growfs is complete.
Therefore inodes will continue to be allocated only in the
AGs which existed prior to the growfs, and the new space
won't be utilized.
This is because of this path in xfs_growfs_data_private():
xfs_growfs_data_private
xfs_initialize_perag(mp, nagcount, &nagimax);
if (mp->m_flags & XFS_MOUNT_32BITINODES)
index = xfs_set_inode32(mp);
else
index = xfs_set_inode64(mp);
if (maxagi)
*maxagi = index;
where xfs_set_inode* iterates over the (old) agcount in
mp->m_sb.sb_agblocks, which has not yet been updated
in the growfs path. So "index" will be returned based on
the old agcount, not the new one, and new AGs are not available
for inode allocation.
Fix this by explicitly passing the proper AG count (which
xfs_initialize_perag() already has) down another level,
so that xfs_set_inode* can make the proper decision about
acceptable AGs for inode allocation in the potentially
newly-added AGs.
This has been broken since 3.7, when these two
xfs_set_inode* functions were added in commit 2d2194f.
Prior to that, we looped over "agcount" not sb_agblocks
in these calculations.
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-07-24 14:51:54 +04:00
/*
2016-03-02 01:58:09 +03:00
* Set parameters for inode allocation heuristics , taking into account
* filesystem size and inode32 / inode64 mount options ; i . e . specifically
2021-08-19 04:46:52 +03:00
* whether or not XFS_FEAT_SMALL_INUMS is set .
2016-03-02 01:58:09 +03:00
*
* Inode allocation patterns are altered only if inode32 is requested
2021-08-19 04:46:52 +03:00
* ( XFS_FEAT_SMALL_INUMS ) , and the filesystem is sufficiently large .
2021-08-19 04:46:52 +03:00
* If altered , XFS_OPSTATE_INODE32 is set as well .
2016-03-02 01:58:09 +03:00
*
* An agcount independent of that in the mount structure is provided
* because in the growfs case , mp - > m_sb . sb_agcount is not yet updated
* to the potentially higher ag count .
*
* Returns the maximum AG index which may contain inodes .
xfs: allow inode allocations in post-growfs disk space
Today, if we perform an xfs_growfs which adds allocation groups,
mp->m_maxagi is not properly updated when the growfs is complete.
Therefore inodes will continue to be allocated only in the
AGs which existed prior to the growfs, and the new space
won't be utilized.
This is because of this path in xfs_growfs_data_private():
xfs_growfs_data_private
xfs_initialize_perag(mp, nagcount, &nagimax);
if (mp->m_flags & XFS_MOUNT_32BITINODES)
index = xfs_set_inode32(mp);
else
index = xfs_set_inode64(mp);
if (maxagi)
*maxagi = index;
where xfs_set_inode* iterates over the (old) agcount in
mp->m_sb.sb_agblocks, which has not yet been updated
in the growfs path. So "index" will be returned based on
the old agcount, not the new one, and new AGs are not available
for inode allocation.
Fix this by explicitly passing the proper AG count (which
xfs_initialize_perag() already has) down another level,
so that xfs_set_inode* can make the proper decision about
acceptable AGs for inode allocation in the potentially
newly-added AGs.
This has been broken since 3.7, when these two
xfs_set_inode* functions were added in commit 2d2194f.
Prior to that, we looped over "agcount" not sb_agblocks
in these calculations.
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-07-24 14:51:54 +04:00
*/
2012-09-20 17:32:38 +04:00
xfs_agnumber_t
2016-03-02 01:58:09 +03:00
xfs_set_inode_alloc (
struct xfs_mount * mp ,
xfs_agnumber_t agcount )
2012-09-20 17:32:38 +04:00
{
2016-03-02 01:58:09 +03:00
xfs_agnumber_t index ;
2012-09-20 17:32:40 +04:00
xfs_agnumber_t maxagi = 0 ;
2012-09-20 17:32:38 +04:00
xfs_sb_t * sbp = & mp - > m_sb ;
xfs_agnumber_t max_metadata ;
2014-07-24 14:53:10 +04:00
xfs_agino_t agino ;
xfs_ino_t ino ;
2012-09-20 17:32:38 +04:00
2016-03-02 01:58:09 +03:00
/*
* Calculate how much should be reserved for inodes to meet
* the max inode percentage . Used only for inode32 .
2012-09-20 17:32:38 +04:00
*/
2019-06-05 21:19:34 +03:00
if ( M_IGEO ( mp ) - > maxicount ) {
2017-06-16 21:00:05 +03:00
uint64_t icount ;
2012-09-20 17:32:38 +04:00
icount = sbp - > sb_dblocks * sbp - > sb_imax_pct ;
do_div ( icount , 100 ) ;
icount + = sbp - > sb_agblocks - 1 ;
do_div ( icount , sbp - > sb_agblocks ) ;
max_metadata = icount ;
} else {
xfs: allow inode allocations in post-growfs disk space
Today, if we perform an xfs_growfs which adds allocation groups,
mp->m_maxagi is not properly updated when the growfs is complete.
Therefore inodes will continue to be allocated only in the
AGs which existed prior to the growfs, and the new space
won't be utilized.
This is because of this path in xfs_growfs_data_private():
xfs_growfs_data_private
xfs_initialize_perag(mp, nagcount, &nagimax);
if (mp->m_flags & XFS_MOUNT_32BITINODES)
index = xfs_set_inode32(mp);
else
index = xfs_set_inode64(mp);
if (maxagi)
*maxagi = index;
where xfs_set_inode* iterates over the (old) agcount in
mp->m_sb.sb_agblocks, which has not yet been updated
in the growfs path. So "index" will be returned based on
the old agcount, not the new one, and new AGs are not available
for inode allocation.
Fix this by explicitly passing the proper AG count (which
xfs_initialize_perag() already has) down another level,
so that xfs_set_inode* can make the proper decision about
acceptable AGs for inode allocation in the potentially
newly-added AGs.
This has been broken since 3.7, when these two
xfs_set_inode* functions were added in commit 2d2194f.
Prior to that, we looped over "agcount" not sb_agblocks
in these calculations.
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-07-24 14:51:54 +04:00
max_metadata = agcount ;
2012-09-20 17:32:38 +04:00
}
2016-03-02 01:58:09 +03:00
/* Get the last possible inode in the filesystem */
2018-12-12 19:46:24 +03:00
agino = XFS_AGB_TO_AGINO ( mp , sbp - > sb_agblocks - 1 ) ;
2016-03-02 01:58:09 +03:00
ino = XFS_AGINO_TO_INO ( mp , agcount - 1 , agino ) ;
/*
* If user asked for no more than 32 - bit inodes , and the fs is
2021-08-19 04:46:52 +03:00
* sufficiently large , set XFS_OPSTATE_INODE32 if we must alter
2016-03-02 01:58:09 +03:00
* the allocator to accommodate the request .
*/
2021-08-19 04:46:52 +03:00
if ( xfs_has_small_inums ( mp ) & & ino > XFS_MAXINUMBER_32 )
2021-08-19 04:46:52 +03:00
set_bit ( XFS_OPSTATE_INODE32 , & mp - > m_opstate ) ;
2016-03-02 01:58:09 +03:00
else
2021-08-19 04:46:52 +03:00
clear_bit ( XFS_OPSTATE_INODE32 , & mp - > m_opstate ) ;
2014-07-24 14:53:10 +04:00
xfs: allow inode allocations in post-growfs disk space
Today, if we perform an xfs_growfs which adds allocation groups,
mp->m_maxagi is not properly updated when the growfs is complete.
Therefore inodes will continue to be allocated only in the
AGs which existed prior to the growfs, and the new space
won't be utilized.
This is because of this path in xfs_growfs_data_private():
xfs_growfs_data_private
xfs_initialize_perag(mp, nagcount, &nagimax);
if (mp->m_flags & XFS_MOUNT_32BITINODES)
index = xfs_set_inode32(mp);
else
index = xfs_set_inode64(mp);
if (maxagi)
*maxagi = index;
where xfs_set_inode* iterates over the (old) agcount in
mp->m_sb.sb_agblocks, which has not yet been updated
in the growfs path. So "index" will be returned based on
the old agcount, not the new one, and new AGs are not available
for inode allocation.
Fix this by explicitly passing the proper AG count (which
xfs_initialize_perag() already has) down another level,
so that xfs_set_inode* can make the proper decision about
acceptable AGs for inode allocation in the potentially
newly-added AGs.
This has been broken since 3.7, when these two
xfs_set_inode* functions were added in commit 2d2194f.
Prior to that, we looped over "agcount" not sb_agblocks
in these calculations.
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2014-07-24 14:51:54 +04:00
for ( index = 0 ; index < agcount ; index + + ) {
2016-03-02 01:58:09 +03:00
struct xfs_perag * pag ;
2012-09-20 17:32:40 +04:00
2016-03-02 01:58:09 +03:00
ino = XFS_AGINO_TO_INO ( mp , index , agino ) ;
2012-09-20 17:32:38 +04:00
pag = xfs_perag_get ( mp , index ) ;
2023-02-13 01:14:52 +03:00
if ( xfs_set_inode_alloc_perag ( pag , ino , max_metadata ) )
maxagi + + ;
2012-09-20 17:32:38 +04:00
xfs_perag_put ( pag ) ;
}
2021-08-19 04:46:52 +03:00
return xfs_is_inode32 ( mp ) ? maxagi : agcount ;
2012-09-20 17:32:38 +04:00
}
2021-11-29 13:21:41 +03:00
static int
xfs_setup_dax_always (
struct xfs_mount * mp )
2021-08-26 16:55:09 +03:00
{
2021-11-29 13:21:42 +03:00
if ( ! mp - > m_ddev_targp - > bt_daxdev & &
( ! mp - > m_rtdev_targp | | ! mp - > m_rtdev_targp - > bt_daxdev ) ) {
2021-11-29 13:21:41 +03:00
xfs_alert ( mp ,
" DAX unsupported by block device. Turning off DAX. " ) ;
goto disable_dax ;
2021-11-29 13:21:42 +03:00
}
if ( mp - > m_super - > s_blocksize ! = PAGE_SIZE ) {
xfs_alert ( mp ,
" DAX not supported for blocksize. Turning off DAX. " ) ;
goto disable_dax ;
2021-11-29 13:21:41 +03:00
}
2022-06-09 17:34:35 +03:00
if ( xfs_has_reflink ( mp ) & &
bdev_is_partition ( mp - > m_ddev_targp - > bt_bdev ) ) {
xfs_alert ( mp ,
" DAX and reflink cannot work with multi-partitions! " ) ;
2021-11-29 13:21:41 +03:00
return - EINVAL ;
}
xfs_warn ( mp , " DAX enabled. Warning: EXPERIMENTAL, use at your own risk " ) ;
return 0 ;
disable_dax :
xfs_mount_set_dax_mode ( mp , XFS_DAX_NEVER ) ;
return 0 ;
2021-08-26 16:55:09 +03:00
}
2009-03-04 21:34:10 +03:00
STATIC int
2005-04-17 02:20:36 +04:00
xfs_blkdev_get (
xfs_mount_t * mp ,
const char * name ,
struct block_device * * bdevp )
{
int error = 0 ;
2010-11-13 13:55:18 +03:00
* bdevp = blkdev_get_by_path ( name , FMODE_READ | FMODE_WRITE | FMODE_EXCL ,
mp ) ;
2005-04-17 02:20:36 +04:00
if ( IS_ERR ( * bdevp ) ) {
error = PTR_ERR ( * bdevp ) ;
2014-12-24 01:47:27 +03:00
xfs_warn ( mp , " Invalid device [%s], error=%d " , name , error ) ;
2005-04-17 02:20:36 +04:00
}
2014-06-25 08:58:08 +04:00
return error ;
2005-04-17 02:20:36 +04:00
}
2009-03-04 21:34:10 +03:00
STATIC void
2005-04-17 02:20:36 +04:00
xfs_blkdev_put (
struct block_device * bdev )
{
if ( bdev )
block: make blkdev_get/put() handle exclusive access
Over time, block layer has accumulated a set of APIs dealing with bdev
open, close, claim and release.
* blkdev_get/put() are the primary open and close functions.
* bd_claim/release() deal with exclusive open.
* open/close_bdev_exclusive() are combination of open and claim and
the other way around, respectively.
* bd_link/unlink_disk_holder() to create and remove holder/slave
symlinks.
* open_by_devnum() wraps bdget() + blkdev_get().
The interface is a bit confusing and the decoupling of open and claim
makes it impossible to properly guarantee exclusive access as
in-kernel open + claim sequence can disturb the existing exclusive
open even before the block layer knows the current open if for another
exclusive access. Reorganize the interface such that,
* blkdev_get() is extended to include exclusive access management.
@holder argument is added and, if is @FMODE_EXCL specified, it will
gain exclusive access atomically w.r.t. other exclusive accesses.
* blkdev_put() is similarly extended. It now takes @mode argument and
if @FMODE_EXCL is set, it releases an exclusive access. Also, when
the last exclusive claim is released, the holder/slave symlinks are
removed automatically.
* bd_claim/release() and close_bdev_exclusive() are no longer
necessary and either made static or removed.
* bd_link_disk_holder() remains the same but bd_unlink_disk_holder()
is no longer necessary and removed.
* open_bdev_exclusive() becomes a simple wrapper around lookup_bdev()
and blkdev_get(). It also has an unexpected extra bdev_read_only()
test which probably should be moved into blkdev_get().
* open_by_devnum() is modified to take @holder argument and pass it to
blkdev_get().
Most of bdev open/close operations are unified into blkdev_get/put()
and most exclusive accesses are tested atomically at the open time (as
it should). This cleans up code and removes some, both valid and
invalid, but unnecessary all the same, corner cases.
open_bdev_exclusive() and open_by_devnum() can use further cleanup -
rename to blkdev_get_by_path() and blkdev_get_by_devt() and drop
special features. Well, let's leave them for another day.
Most conversions are straight-forward. drbd conversion is a bit more
involved as there was some reordering, but the logic should stay the
same.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Neil Brown <neilb@suse.de>
Acked-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Peter Osterlund <petero2@telia.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <joel.becker@oracle.com>
Cc: Alex Elder <aelder@sgi.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: dm-devel@redhat.com
Cc: drbd-dev@lists.linbit.com
Cc: Leo Chen <leochen@broadcom.com>
Cc: Scott Branden <sbranden@broadcom.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Dave Kleikamp <shaggy@linux.vnet.ibm.com>
Cc: Joern Engel <joern@logfs.org>
Cc: reiserfs-devel@vger.kernel.org
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
2010-11-13 13:55:17 +03:00
blkdev_put ( bdev , FMODE_READ | FMODE_WRITE | FMODE_EXCL ) ;
2005-04-17 02:20:36 +04:00
}
2008-05-20 05:31:13 +04:00
STATIC void
xfs_close_devices (
struct xfs_mount * mp )
{
if ( mp - > m_logdev_targp & & mp - > m_logdev_targp ! = mp - > m_ddev_targp ) {
2008-07-18 11:13:12 +04:00
struct block_device * logdev = mp - > m_logdev_targp - > bt_bdev ;
2017-08-25 01:12:50 +03:00
2018-04-06 20:09:42 +03:00
xfs_free_buftarg ( mp - > m_logdev_targp ) ;
2008-07-18 11:13:12 +04:00
xfs_blkdev_put ( logdev ) ;
2008-05-20 05:31:13 +04:00
}
if ( mp - > m_rtdev_targp ) {
2008-07-18 11:13:12 +04:00
struct block_device * rtdev = mp - > m_rtdev_targp - > bt_bdev ;
2017-08-25 01:12:50 +03:00
2018-04-06 20:09:42 +03:00
xfs_free_buftarg ( mp - > m_rtdev_targp ) ;
2008-07-18 11:13:12 +04:00
xfs_blkdev_put ( rtdev ) ;
2008-05-20 05:31:13 +04:00
}
2018-04-06 20:09:42 +03:00
xfs_free_buftarg ( mp - > m_ddev_targp ) ;
2008-05-20 05:31:13 +04:00
}
/*
* The file system configurations are :
* ( 1 ) device ( partition ) with data and internal log
* ( 2 ) logical volume with data and log subvolumes .
* ( 3 ) logical volume with data , log , and realtime subvolumes .
*
* We only have to handle opening the log and realtime volumes here if
* they are present . The data subvolume has already been opened by
* get_sb_bdev ( ) and is stored in sb - > s_bdev .
*/
STATIC int
xfs_open_devices (
2008-10-30 09:53:24 +03:00
struct xfs_mount * mp )
2008-05-20 05:31:13 +04:00
{
struct block_device * ddev = mp - > m_super - > s_bdev ;
struct block_device * logdev = NULL , * rtdev = NULL ;
int error ;
/*
* Open real time and log devices - order is important .
*/
2008-10-30 09:53:24 +03:00
if ( mp - > m_logname ) {
error = xfs_blkdev_get ( mp , mp - > m_logname , & logdev ) ;
2008-05-20 05:31:13 +04:00
if ( error )
2021-11-29 13:21:55 +03:00
return error ;
2008-05-20 05:31:13 +04:00
}
2008-10-30 09:53:24 +03:00
if ( mp - > m_rtname ) {
error = xfs_blkdev_get ( mp , mp - > m_rtname , & rtdev ) ;
2008-05-20 05:31:13 +04:00
if ( error )
goto out_close_logdev ;
if ( rtdev = = ddev | | rtdev = = logdev ) {
2011-03-07 02:00:35 +03:00
xfs_warn ( mp ,
" Cannot mount filesystem with identical rtdev and ddev/logdev. " ) ;
2014-06-25 08:58:08 +04:00
error = - EINVAL ;
2008-05-20 05:31:13 +04:00
goto out_close_rtdev ;
}
}
/*
* Setup xfs_mount buffer target pointers
*/
2014-06-25 08:58:08 +04:00
error = - ENOMEM ;
2021-11-29 13:21:55 +03:00
mp - > m_ddev_targp = xfs_alloc_buftarg ( mp , ddev ) ;
2008-05-20 05:31:13 +04:00
if ( ! mp - > m_ddev_targp )
goto out_close_rtdev ;
if ( rtdev ) {
2021-11-29 13:21:55 +03:00
mp - > m_rtdev_targp = xfs_alloc_buftarg ( mp , rtdev ) ;
2008-05-20 05:31:13 +04:00
if ( ! mp - > m_rtdev_targp )
goto out_free_ddev_targ ;
}
if ( logdev & & logdev ! = ddev ) {
2021-11-29 13:21:55 +03:00
mp - > m_logdev_targp = xfs_alloc_buftarg ( mp , logdev ) ;
2008-05-20 05:31:13 +04:00
if ( ! mp - > m_logdev_targp )
goto out_free_rtdev_targ ;
} else {
mp - > m_logdev_targp = mp - > m_ddev_targp ;
}
return 0 ;
out_free_rtdev_targ :
if ( mp - > m_rtdev_targp )
2018-04-06 20:09:42 +03:00
xfs_free_buftarg ( mp - > m_rtdev_targp ) ;
2008-05-20 05:31:13 +04:00
out_free_ddev_targ :
2018-04-06 20:09:42 +03:00
xfs_free_buftarg ( mp - > m_ddev_targp ) ;
2008-05-20 05:31:13 +04:00
out_close_rtdev :
2014-12-01 00:24:20 +03:00
xfs_blkdev_put ( rtdev ) ;
2008-05-20 05:31:13 +04:00
out_close_logdev :
2021-11-29 13:21:55 +03:00
if ( logdev & & logdev ! = ddev )
2008-05-20 05:31:13 +04:00
xfs_blkdev_put ( logdev ) ;
return error ;
}
2008-05-20 09:10:36 +04:00
/*
* Setup xfs_mount buffer target pointers based on superblock
*/
STATIC int
xfs_setup_devices (
struct xfs_mount * mp )
{
int error ;
2008-05-20 05:31:13 +04:00
2014-04-14 13:00:29 +04:00
error = xfs_setsize_buftarg ( mp - > m_ddev_targp , mp - > m_sb . sb_sectsize ) ;
2008-05-20 09:10:36 +04:00
if ( error )
return error ;
if ( mp - > m_logdev_targp & & mp - > m_logdev_targp ! = mp - > m_ddev_targp ) {
unsigned int log_sector_size = BBSIZE ;
2021-08-19 04:46:37 +03:00
if ( xfs_has_sector ( mp ) )
2008-05-20 09:10:36 +04:00
log_sector_size = mp - > m_sb . sb_logsectsize ;
error = xfs_setsize_buftarg ( mp - > m_logdev_targp ,
log_sector_size ) ;
if ( error )
return error ;
}
if ( mp - > m_rtdev_targp ) {
error = xfs_setsize_buftarg ( mp - > m_rtdev_targp ,
mp - > m_sb . sb_sectsize ) ;
if ( error )
return error ;
}
return 0 ;
}
2008-05-20 05:31:13 +04:00
2012-02-29 13:53:48 +04:00
STATIC int
xfs_init_mount_workqueues (
struct xfs_mount * mp )
{
2014-11-28 05:59:58 +03:00
mp - > m_buf_workqueue = alloc_workqueue ( " xfs-buf/%s " ,
2021-01-23 03:48:42 +03:00
XFS_WQFLAGS ( WQ_FREEZABLE | WQ_MEM_RECLAIM ) ,
1 , mp - > m_super - > s_id ) ;
2014-11-28 05:59:58 +03:00
if ( ! mp - > m_buf_workqueue )
goto out ;
2012-02-29 13:53:48 +04:00
mp - > m_unwritten_workqueue = alloc_workqueue ( " xfs-conv/%s " ,
2021-01-23 03:48:42 +03:00
XFS_WQFLAGS ( WQ_FREEZABLE | WQ_MEM_RECLAIM ) ,
0 , mp - > m_super - > s_id ) ;
2012-02-29 13:53:48 +04:00
if ( ! mp - > m_unwritten_workqueue )
2019-04-15 23:13:21 +03:00
goto out_destroy_buf ;
2012-02-29 13:53:48 +04:00
2012-10-08 14:56:05 +04:00
mp - > m_reclaim_workqueue = alloc_workqueue ( " xfs-reclaim/%s " ,
2021-01-23 03:48:42 +03:00
XFS_WQFLAGS ( WQ_FREEZABLE | WQ_MEM_RECLAIM ) ,
0 , mp - > m_super - > s_id ) ;
2012-10-08 14:56:05 +04:00
if ( ! mp - > m_reclaim_workqueue )
2021-08-11 04:00:45 +03:00
goto out_destroy_unwritten ;
2012-10-08 14:56:05 +04:00
2021-08-06 21:05:39 +03:00
mp - > m_blockgc_wq = alloc_workqueue ( " xfs-blockgc/%s " ,
XFS_WQFLAGS ( WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM ) ,
2021-01-23 03:48:42 +03:00
0 , mp - > m_super - > s_id ) ;
2021-08-06 21:05:39 +03:00
if ( ! mp - > m_blockgc_wq )
2019-06-29 05:27:25 +03:00
goto out_destroy_reclaim ;
2012-11-06 18:50:47 +04:00
2021-08-06 21:05:39 +03:00
mp - > m_inodegc_wq = alloc_workqueue ( " xfs-inodegc/%s " ,
XFS_WQFLAGS ( WQ_FREEZABLE | WQ_MEM_RECLAIM ) ,
1 , mp - > m_super - > s_id ) ;
if ( ! mp - > m_inodegc_wq )
goto out_destroy_blockgc ;
2021-01-23 03:48:42 +03:00
mp - > m_sync_workqueue = alloc_workqueue ( " xfs-sync/%s " ,
XFS_WQFLAGS ( WQ_FREEZABLE ) , 0 , mp - > m_super - > s_id ) ;
2017-03-29 00:51:44 +03:00
if ( ! mp - > m_sync_workqueue )
2021-08-06 21:05:39 +03:00
goto out_destroy_inodegc ;
2017-03-29 00:51:44 +03:00
2012-02-29 13:53:48 +04:00
return 0 ;
2021-08-06 21:05:39 +03:00
out_destroy_inodegc :
destroy_workqueue ( mp - > m_inodegc_wq ) ;
out_destroy_blockgc :
destroy_workqueue ( mp - > m_blockgc_wq ) ;
2012-10-08 14:56:05 +04:00
out_destroy_reclaim :
destroy_workqueue ( mp - > m_reclaim_workqueue ) ;
2012-04-23 11:54:32 +04:00
out_destroy_unwritten :
destroy_workqueue ( mp - > m_unwritten_workqueue ) ;
2014-11-28 05:59:58 +03:00
out_destroy_buf :
destroy_workqueue ( mp - > m_buf_workqueue ) ;
2012-02-29 13:53:48 +04:00
out :
return - ENOMEM ;
}
STATIC void
xfs_destroy_mount_workqueues (
struct xfs_mount * mp )
{
2017-03-29 00:51:44 +03:00
destroy_workqueue ( mp - > m_sync_workqueue ) ;
2021-08-06 21:05:39 +03:00
destroy_workqueue ( mp - > m_blockgc_wq ) ;
destroy_workqueue ( mp - > m_inodegc_wq ) ;
2012-10-08 14:56:05 +04:00
destroy_workqueue ( mp - > m_reclaim_workqueue ) ;
2012-02-29 13:53:48 +04:00
destroy_workqueue ( mp - > m_unwritten_workqueue ) ;
2014-11-28 05:59:58 +03:00
destroy_workqueue ( mp - > m_buf_workqueue ) ;
2012-02-29 13:53:48 +04:00
}
2020-04-12 23:11:10 +03:00
static void
xfs_flush_inodes_worker (
struct work_struct * work )
{
struct xfs_mount * mp = container_of ( work , struct xfs_mount ,
m_flush_inodes_work ) ;
struct super_block * sb = mp - > m_super ;
if ( down_read_trylock ( & sb - > s_umount ) ) {
sync_inodes_sb ( sb ) ;
up_read ( & sb - > s_umount ) ;
}
}
2012-10-08 14:56:04 +04:00
/*
* Flush all dirty data to disk . Must not be called while holding an XFS_ILOCK
* or a page lock . We use sync_inodes_sb ( ) here to ensure we block while waiting
* for IO to complete so that we effectively throttle multiple callers to the
* rate at which IO is completing .
*/
void
xfs_flush_inodes (
struct xfs_mount * mp )
{
2020-04-12 23:11:10 +03:00
/*
* If flush_work ( ) returns true then that means we waited for a flush
* which was already in progress . Don ' t bother running another scan .
*/
if ( flush_work ( & mp - > m_flush_inodes_work ) )
2020-03-27 18:49:44 +03:00
return ;
2020-04-12 23:11:10 +03:00
queue_work ( mp - > m_sync_workqueue , & mp - > m_flush_inodes_work ) ;
flush_work ( & mp - > m_flush_inodes_work ) ;
2012-10-08 14:56:04 +04:00
}
2008-10-30 09:36:14 +03:00
/* Catch misguided souls that try to use this interface on XFS */
2005-04-17 02:20:36 +04:00
STATIC struct inode *
2006-03-14 06:06:18 +03:00
xfs_fs_alloc_inode (
2005-04-17 02:20:36 +04:00
struct super_block * sb )
{
2008-10-30 09:36:14 +03:00
BUG ( ) ;
2008-10-30 09:36:52 +03:00
return NULL ;
2005-04-17 02:20:36 +04:00
}
2008-10-30 09:36:14 +03:00
/*
2008-10-30 09:36:40 +03:00
* Now that the generic code is guaranteed not to be accessing
2016-05-18 06:52:42 +03:00
* the linux inode , we can inactivate and reclaim the inode .
2008-10-30 09:36:14 +03:00
*/
2005-04-17 02:20:36 +04:00
STATIC void
2006-03-14 06:06:18 +03:00
xfs_fs_destroy_inode (
2009-09-29 17:48:56 +04:00
struct inode * inode )
2005-04-17 02:20:36 +04:00
{
2009-09-29 17:48:56 +04:00
struct xfs_inode * ip = XFS_I ( inode ) ;
2010-06-24 05:57:09 +04:00
trace_xfs_destroy_inode ( ip ) ;
2008-10-30 09:36:40 +03:00
2016-11-30 06:33:25 +03:00
ASSERT ( ! rwsem_is_locked ( & inode - > i_rwsem ) ) ;
2016-05-18 06:52:42 +03:00
XFS_STATS_INC ( ip - > i_mount , vn_rele ) ;
XFS_STATS_INC ( ip - > i_mount , vn_remove ) ;
2021-05-31 21:32:02 +03:00
xfs_inode_mark_reclaimable ( ip ) ;
2005-04-17 02:20:36 +04:00
}
2018-03-07 04:04:00 +03:00
static void
xfs_fs_dirty_inode (
struct inode * inode ,
2022-08-25 13:06:57 +03:00
int flags )
2018-03-07 04:04:00 +03:00
{
struct xfs_inode * ip = XFS_I ( inode ) ;
struct xfs_mount * mp = ip - > i_mount ;
struct xfs_trans * tp ;
if ( ! ( inode - > i_sb - > s_flags & SB_LAZYTIME ) )
return ;
2022-08-25 13:06:57 +03:00
/*
* Only do the timestamp update if the inode is dirty ( I_DIRTY_SYNC )
* and has dirty timestamp ( I_DIRTY_TIME ) . I_DIRTY_TIME can be passed
* in flags possibly together with I_DIRTY_SYNC .
*/
if ( ( flags & ~ I_DIRTY_TIME ) ! = I_DIRTY_SYNC | | ! ( flags & I_DIRTY_TIME ) )
2018-03-07 04:04:00 +03:00
return ;
if ( xfs_trans_alloc ( mp , & M_RES ( mp ) - > tr_fsyncts , 0 , 0 , 0 , & tp ) )
return ;
xfs_ilock ( ip , XFS_ILOCK_EXCL ) ;
xfs_trans_ijoin ( tp , ip , XFS_ILOCK_EXCL ) ;
xfs_trans_log_inode ( tp , ip , XFS_ILOG_TIMESTAMP ) ;
xfs_trans_commit ( tp ) ;
}
2008-10-30 08:11:59 +03:00
/*
* Slab object creation initialisation for the XFS inode .
* This covers only the idempotent fields in the XFS inode ;
* all other fields need to be initialised on allocation
tree-wide: fix comment/printk typos
"gadget", "through", "command", "maintain", "maintain", "controller", "address",
"between", "initiali[zs]e", "instead", "function", "select", "already",
"equal", "access", "management", "hierarchy", "registration", "interest",
"relative", "memory", "offset", "already",
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
2010-11-01 22:38:34 +03:00
* from the slab . This avoids the need to repeatedly initialise
2008-10-30 08:11:59 +03:00
* fields in the xfs inode that left in the initialise state
* when freeing the inode .
*/
2008-10-30 09:36:14 +03:00
STATIC void
xfs_fs_inode_init_once (
2008-10-30 08:11:59 +03:00
void * inode )
{
struct xfs_inode * ip = inode ;
memset ( ip , 0 , sizeof ( struct xfs_inode ) ) ;
2008-10-30 09:36:14 +03:00
/* vfs inode */
inode_init_once ( VFS_I ( ip ) ) ;
/* xfs inode */
2008-10-30 08:11:59 +03:00
atomic_set ( & ip - > i_pincount , 0 ) ;
spin_lock_init ( & ip - > i_flags_lock ) ;
mrlock_init ( & ip - > i_lock , MRLOCK_ALLOW_EQUAL_PRI | MRLOCK_BARRIER ,
" xfsino " , ip - > i_ino ) ;
}
2012-03-22 09:15:10 +04:00
/*
* We do an unlocked check for XFS_IDONTCACHE here because we are already
* serialised against cache hits here via the inode - > i_lock and igrab ( ) in
* xfs_iget_cache_hit ( ) . Hence a lookup that might clear this flag will not be
* racing with us , and it avoids needing to grab a spinlock here for every inode
* we drop the final reference on .
*/
STATIC int
xfs_fs_drop_inode (
struct inode * inode )
{
struct xfs_inode * ip = XFS_I ( inode ) ;
2016-10-03 19:11:29 +03:00
/*
* If this unlinked inode is in the middle of recovery , don ' t
* drop the inode just yet ; log recovery will take care of
* that . See the comment for this inode flag .
*/
if ( ip - > i_flags & XFS_IRECOVERY ) {
2021-08-11 03:59:02 +03:00
ASSERT ( xlog_recovery_needed ( ip - > i_mount - > m_log ) ) ;
2016-10-03 19:11:29 +03:00
return 0 ;
}
2020-04-30 17:41:37 +03:00
return generic_drop_inode ( inode ) ;
2012-03-22 09:15:10 +04:00
}
2019-11-05 00:58:42 +03:00
static void
xfs_mount_free (
2008-08-13 10:04:05 +04:00
struct xfs_mount * mp )
{
kfree ( mp - > m_rtname ) ;
kfree ( mp - > m_logname ) ;
2019-11-05 00:58:42 +03:00
kmem_free ( mp ) ;
2008-08-13 10:04:05 +04:00
}
2005-04-17 02:20:36 +04:00
STATIC int
2009-10-07 00:29:28 +04:00
xfs_fs_sync_fs (
2005-04-17 02:20:36 +04:00
struct super_block * sb ,
int wait )
{
2007-08-30 11:20:39 +04:00
struct xfs_mount * mp = XFS_M ( sb ) ;
2022-01-30 19:53:17 +03:00
int error ;
2005-04-17 02:20:36 +04:00
2021-08-06 21:05:39 +03:00
trace_xfs_fs_sync_fs ( mp , __return_address ) ;
2007-10-12 05:13:35 +04:00
/*
2011-12-07 01:58:12 +04:00
* Doing anything during the async pass would be counterproductive .
2007-10-12 05:13:35 +04:00
*/
2011-12-07 01:58:12 +04:00
if ( ! wait )
2009-10-07 00:29:28 +04:00
return 0 ;
2022-01-30 19:53:17 +03:00
error = xfs_log_force ( mp , XFS_LOG_SYNC ) ;
if ( error )
return error ;
2009-10-07 00:29:28 +04:00
if ( laptop_mode ) {
2005-04-17 02:20:36 +04:00
/*
* The disk must be active because we ' re syncing .
2012-10-08 14:56:02 +04:00
* We schedule log work now ( now that the disk is
2005-04-17 02:20:36 +04:00
* active ) instead of later ( when it might not be ) .
*/
2012-10-08 14:56:02 +04:00
flush_delayed_work ( & mp - > m_log - > l_work ) ;
2005-04-17 02:20:36 +04:00
}
2021-08-06 21:05:39 +03:00
/*
* If we are called with page faults frozen out , it means we are about
* to freeze the transaction subsystem . Take the opportunity to shut
* down inodegc because once SB_FREEZE_FS is set it ' s too late to
* prevent inactivation races with freeze . The fs doesn ' t get called
* again by the freezing process until after SB_FREEZE_FS has been set ,
2021-08-06 21:05:42 +03:00
* so it ' s now or never . Same logic applies to speculative allocation
* garbage collection .
2021-08-06 21:05:39 +03:00
*
* We don ' t care if this is a normal syncfs call that does this or
* freeze that does this - we can run this multiple times without issue
* and we won ' t race with a restart because a restart can only occur
* when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE .
*/
2021-08-06 21:05:42 +03:00
if ( sb - > s_writers . frozen = = SB_FREEZE_PAGEFAULT ) {
2021-08-06 21:05:39 +03:00
xfs_inodegc_stop ( mp ) ;
2021-08-06 21:05:42 +03:00
xfs_blockgc_stop ( mp ) ;
}
2021-08-06 21:05:39 +03:00
2009-10-07 00:29:28 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
STATIC int
2006-03-14 06:06:18 +03:00
xfs_fs_statfs (
2006-06-23 13:02:58 +04:00
struct dentry * dentry ,
2005-04-17 02:20:36 +04:00
struct kstatfs * statp )
{
2007-10-11 12:09:40 +04:00
struct xfs_mount * mp = XFS_M ( dentry - > d_sb ) ;
xfs_sb_t * sbp = & mp - > m_sb ;
2015-03-18 01:25:59 +03:00
struct xfs_inode * ip = XFS_I ( d_inode ( dentry ) ) ;
2017-06-16 21:00:05 +03:00
uint64_t fakeinos , id ;
uint64_t icount ;
uint64_t ifree ;
uint64_t fdblocks ;
2007-10-11 12:09:40 +04:00
xfs_extlen_t lsize ;
2017-06-16 21:00:05 +03:00
int64_t ffree ;
2007-10-11 12:09:40 +04:00
2022-06-16 17:44:32 +03:00
/*
* Expedite background inodegc but don ' t wait . We do not want to block
* here waiting hours for a billion extent file to be truncated .
*/
xfs_inodegc_push ( mp ) ;
2021-08-06 21:05:42 +03:00
2018-10-18 09:20:19 +03:00
statp - > f_type = XFS_SUPER_MAGIC ;
2007-10-11 12:09:40 +04:00
statp - > f_namelen = MAXNAMELEN - 1 ;
id = huge_encode_dev ( mp - > m_ddev_targp - > bt_dev ) ;
2020-09-18 23:45:50 +03:00
statp - > f_fsid = u64_to_fsid ( id ) ;
2007-10-11 12:09:40 +04:00
2015-02-23 13:19:28 +03:00
icount = percpu_counter_sum ( & mp - > m_icount ) ;
2015-02-23 13:19:53 +03:00
ifree = percpu_counter_sum ( & mp - > m_ifree ) ;
2015-02-23 13:22:03 +03:00
fdblocks = percpu_counter_sum ( & mp - > m_fdblocks ) ;
2007-10-11 12:09:40 +04:00
spin_lock ( & mp - > m_sb_lock ) ;
statp - > f_bsize = sbp - > sb_blocksize ;
lsize = sbp - > sb_logstart ? sbp - > sb_logblocks : 0 ;
statp - > f_blocks = sbp - > sb_dblocks - lsize ;
2015-02-23 13:22:03 +03:00
spin_unlock ( & mp - > m_sb_lock ) ;
2020-05-12 21:48:35 +03:00
/* make sure statp->f_bfree does not underflow */
2022-03-16 23:38:43 +03:00
statp - > f_bfree = max_t ( int64_t , 0 ,
fdblocks - xfs_fdblocks_unavailable ( mp ) ) ;
2015-02-23 13:22:03 +03:00
statp - > f_bavail = statp - > f_bfree ;
2018-12-12 19:46:24 +03:00
fakeinos = XFS_FSB_TO_INO ( mp , statp - > f_bfree ) ;
2018-06-07 17:54:02 +03:00
statp - > f_files = min ( icount + fakeinos , ( uint64_t ) XFS_MAXINUMBER ) ;
2019-06-05 21:19:34 +03:00
if ( M_IGEO ( mp ) - > maxicount )
2009-03-29 11:51:08 +04:00
statp - > f_files = min_t ( typeof ( statp - > f_files ) ,
statp - > f_files ,
2019-06-05 21:19:34 +03:00
M_IGEO ( mp ) - > maxicount ) ;
2010-08-24 05:46:05 +04:00
2015-02-06 01:53:02 +03:00
/* If sb_icount overshot maxicount, report actual allocation */
statp - > f_files = max_t ( typeof ( statp - > f_files ) ,
statp - > f_files ,
sbp - > sb_icount ) ;
2010-08-24 05:46:05 +04:00
/* make sure statp->f_ffree does not underflow */
2015-02-23 13:19:53 +03:00
ffree = statp - > f_files - ( icount - ifree ) ;
2017-06-16 21:00:05 +03:00
statp - > f_ffree = max_t ( int64_t , ffree , 0 ) ;
2010-08-24 05:46:05 +04:00
2007-10-11 12:09:40 +04:00
2021-03-29 21:11:44 +03:00
if ( ( ip - > i_diflags & XFS_DIFLAG_PROJINHERIT ) & &
2013-06-28 02:25:10 +04:00
( ( mp - > m_qflags & ( XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD ) ) ) = =
( XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD ) )
2009-06-08 17:33:32 +04:00
xfs_qm_statvfs ( ip , statp ) ;
2018-01-08 21:41:33 +03:00
if ( XFS_IS_REALTIME_MOUNT ( mp ) & &
2021-03-29 21:11:44 +03:00
( ip - > i_diflags & ( XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME ) ) ) {
2022-04-11 23:49:42 +03:00
s64 freertx ;
2018-01-08 21:41:33 +03:00
statp - > f_blocks = sbp - > sb_rblocks ;
2022-04-11 23:49:42 +03:00
freertx = percpu_counter_sum_positive ( & mp - > m_frextents ) ;
statp - > f_bavail = statp - > f_bfree = freertx * sbp - > sb_rextsize ;
2018-01-08 21:41:33 +03:00
}
2007-10-11 12:09:40 +04:00
return 0 ;
2005-04-17 02:20:36 +04:00
}
2010-02-06 01:59:53 +03:00
STATIC void
xfs_save_resvblks ( struct xfs_mount * mp )
{
2017-06-16 21:00:05 +03:00
uint64_t resblks = 0 ;
2010-02-06 01:59:53 +03:00
mp - > m_resblks_save = mp - > m_resblks ;
xfs_reserve_blocks ( mp , & resblks , NULL ) ;
}
STATIC void
xfs_restore_resvblks ( struct xfs_mount * mp )
{
2017-06-16 21:00:05 +03:00
uint64_t resblks ;
2010-02-06 01:59:53 +03:00
if ( mp - > m_resblks_save ) {
resblks = mp - > m_resblks_save ;
mp - > m_resblks_save = 0 ;
} else
resblks = xfs_default_resblks ( mp ) ;
xfs_reserve_blocks ( mp , & resblks , NULL ) ;
}
2019-11-05 00:58:48 +03:00
/*
* Second stage of a freeze . The data is already frozen so we only
* need to take care of the metadata . Once that ' s done sync the superblock
* to the log to dirty it in case of a crash while frozen . This ensures that we
* will recover the unlinked inode lists on the next mount .
*/
STATIC int
xfs_fs_freeze (
struct super_block * sb )
{
struct xfs_mount * mp = XFS_M ( sb ) ;
xfs: Fix false positive lockdep warning with sb_internal & fs_reclaim
Depending on the workloads, the following circular locking dependency
warning between sb_internal (a percpu rwsem) and fs_reclaim (a pseudo
lock) may show up:
======================================================
WARNING: possible circular locking dependency detected
5.0.0-rc1+ #60 Tainted: G W
------------------------------------------------------
fsfreeze/4346 is trying to acquire lock:
0000000026f1d784 (fs_reclaim){+.+.}, at:
fs_reclaim_acquire.part.19+0x5/0x30
but task is already holding lock:
0000000072bfc54b (sb_internal){++++}, at: percpu_down_write+0xb4/0x650
which lock already depends on the new lock.
:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(sb_internal);
lock(fs_reclaim);
lock(sb_internal);
lock(fs_reclaim);
*** DEADLOCK ***
4 locks held by fsfreeze/4346:
#0: 00000000b478ef56 (sb_writers#8){++++}, at: percpu_down_write+0xb4/0x650
#1: 000000001ec487a9 (&type->s_umount_key#28){++++}, at: freeze_super+0xda/0x290
#2: 000000003edbd5a0 (sb_pagefaults){++++}, at: percpu_down_write+0xb4/0x650
#3: 0000000072bfc54b (sb_internal){++++}, at: percpu_down_write+0xb4/0x650
stack backtrace:
Call Trace:
dump_stack+0xe0/0x19a
print_circular_bug.isra.10.cold.34+0x2f4/0x435
check_prev_add.constprop.19+0xca1/0x15f0
validate_chain.isra.14+0x11af/0x3b50
__lock_acquire+0x728/0x1200
lock_acquire+0x269/0x5a0
fs_reclaim_acquire.part.19+0x29/0x30
fs_reclaim_acquire+0x19/0x20
kmem_cache_alloc+0x3e/0x3f0
kmem_zone_alloc+0x79/0x150
xfs_trans_alloc+0xfa/0x9d0
xfs_sync_sb+0x86/0x170
xfs_log_sbcount+0x10f/0x140
xfs_quiesce_attr+0x134/0x270
xfs_fs_freeze+0x4a/0x70
freeze_super+0x1af/0x290
do_vfs_ioctl+0xedc/0x16c0
ksys_ioctl+0x41/0x80
__x64_sys_ioctl+0x73/0xa9
do_syscall_64+0x18f/0xd23
entry_SYSCALL_64_after_hwframe+0x49/0xbe
This is a false positive as all the dirty pages are flushed out before
the filesystem can be frozen.
One way to avoid this splat is to add GFP_NOFS to the affected allocation
calls by using the memalloc_nofs_save()/memalloc_nofs_restore() pair.
This shouldn't matter unless the system is really running out of memory.
In that particular case, the filesystem freeze operation may fail while
it was succeeding previously.
Without this patch, the command sequence below will show that the lock
dependency chain sb_internal -> fs_reclaim exists.
# fsfreeze -f /home
# fsfreeze --unfreeze /home
# grep -i fs_reclaim -C 3 /proc/lockdep_chains | grep -C 5 sb_internal
After applying the patch, such sb_internal -> fs_reclaim lock dependency
chain can no longer be found. Because of that, the locking dependency
warning will not be shown.
Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2020-07-08 20:21:44 +03:00
unsigned int flags ;
int ret ;
2019-11-05 00:58:48 +03:00
xfs: Fix false positive lockdep warning with sb_internal & fs_reclaim
Depending on the workloads, the following circular locking dependency
warning between sb_internal (a percpu rwsem) and fs_reclaim (a pseudo
lock) may show up:
======================================================
WARNING: possible circular locking dependency detected
5.0.0-rc1+ #60 Tainted: G W
------------------------------------------------------
fsfreeze/4346 is trying to acquire lock:
0000000026f1d784 (fs_reclaim){+.+.}, at:
fs_reclaim_acquire.part.19+0x5/0x30
but task is already holding lock:
0000000072bfc54b (sb_internal){++++}, at: percpu_down_write+0xb4/0x650
which lock already depends on the new lock.
:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(sb_internal);
lock(fs_reclaim);
lock(sb_internal);
lock(fs_reclaim);
*** DEADLOCK ***
4 locks held by fsfreeze/4346:
#0: 00000000b478ef56 (sb_writers#8){++++}, at: percpu_down_write+0xb4/0x650
#1: 000000001ec487a9 (&type->s_umount_key#28){++++}, at: freeze_super+0xda/0x290
#2: 000000003edbd5a0 (sb_pagefaults){++++}, at: percpu_down_write+0xb4/0x650
#3: 0000000072bfc54b (sb_internal){++++}, at: percpu_down_write+0xb4/0x650
stack backtrace:
Call Trace:
dump_stack+0xe0/0x19a
print_circular_bug.isra.10.cold.34+0x2f4/0x435
check_prev_add.constprop.19+0xca1/0x15f0
validate_chain.isra.14+0x11af/0x3b50
__lock_acquire+0x728/0x1200
lock_acquire+0x269/0x5a0
fs_reclaim_acquire.part.19+0x29/0x30
fs_reclaim_acquire+0x19/0x20
kmem_cache_alloc+0x3e/0x3f0
kmem_zone_alloc+0x79/0x150
xfs_trans_alloc+0xfa/0x9d0
xfs_sync_sb+0x86/0x170
xfs_log_sbcount+0x10f/0x140
xfs_quiesce_attr+0x134/0x270
xfs_fs_freeze+0x4a/0x70
freeze_super+0x1af/0x290
do_vfs_ioctl+0xedc/0x16c0
ksys_ioctl+0x41/0x80
__x64_sys_ioctl+0x73/0xa9
do_syscall_64+0x18f/0xd23
entry_SYSCALL_64_after_hwframe+0x49/0xbe
This is a false positive as all the dirty pages are flushed out before
the filesystem can be frozen.
One way to avoid this splat is to add GFP_NOFS to the affected allocation
calls by using the memalloc_nofs_save()/memalloc_nofs_restore() pair.
This shouldn't matter unless the system is really running out of memory.
In that particular case, the filesystem freeze operation may fail while
it was succeeding previously.
Without this patch, the command sequence below will show that the lock
dependency chain sb_internal -> fs_reclaim exists.
# fsfreeze -f /home
# fsfreeze --unfreeze /home
# grep -i fs_reclaim -C 3 /proc/lockdep_chains | grep -C 5 sb_internal
After applying the patch, such sb_internal -> fs_reclaim lock dependency
chain can no longer be found. Because of that, the locking dependency
warning will not be shown.
Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2020-07-08 20:21:44 +03:00
/*
* The filesystem is now frozen far enough that memory reclaim
* cannot safely operate on the filesystem . Hence we need to
* set a GFP_NOFS context here to avoid recursion deadlocks .
*/
flags = memalloc_nofs_save ( ) ;
2019-11-05 00:58:48 +03:00
xfs_save_resvblks ( mp ) ;
2021-01-23 03:48:24 +03:00
ret = xfs_log_quiesce ( mp ) ;
xfs: Fix false positive lockdep warning with sb_internal & fs_reclaim
Depending on the workloads, the following circular locking dependency
warning between sb_internal (a percpu rwsem) and fs_reclaim (a pseudo
lock) may show up:
======================================================
WARNING: possible circular locking dependency detected
5.0.0-rc1+ #60 Tainted: G W
------------------------------------------------------
fsfreeze/4346 is trying to acquire lock:
0000000026f1d784 (fs_reclaim){+.+.}, at:
fs_reclaim_acquire.part.19+0x5/0x30
but task is already holding lock:
0000000072bfc54b (sb_internal){++++}, at: percpu_down_write+0xb4/0x650
which lock already depends on the new lock.
:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(sb_internal);
lock(fs_reclaim);
lock(sb_internal);
lock(fs_reclaim);
*** DEADLOCK ***
4 locks held by fsfreeze/4346:
#0: 00000000b478ef56 (sb_writers#8){++++}, at: percpu_down_write+0xb4/0x650
#1: 000000001ec487a9 (&type->s_umount_key#28){++++}, at: freeze_super+0xda/0x290
#2: 000000003edbd5a0 (sb_pagefaults){++++}, at: percpu_down_write+0xb4/0x650
#3: 0000000072bfc54b (sb_internal){++++}, at: percpu_down_write+0xb4/0x650
stack backtrace:
Call Trace:
dump_stack+0xe0/0x19a
print_circular_bug.isra.10.cold.34+0x2f4/0x435
check_prev_add.constprop.19+0xca1/0x15f0
validate_chain.isra.14+0x11af/0x3b50
__lock_acquire+0x728/0x1200
lock_acquire+0x269/0x5a0
fs_reclaim_acquire.part.19+0x29/0x30
fs_reclaim_acquire+0x19/0x20
kmem_cache_alloc+0x3e/0x3f0
kmem_zone_alloc+0x79/0x150
xfs_trans_alloc+0xfa/0x9d0
xfs_sync_sb+0x86/0x170
xfs_log_sbcount+0x10f/0x140
xfs_quiesce_attr+0x134/0x270
xfs_fs_freeze+0x4a/0x70
freeze_super+0x1af/0x290
do_vfs_ioctl+0xedc/0x16c0
ksys_ioctl+0x41/0x80
__x64_sys_ioctl+0x73/0xa9
do_syscall_64+0x18f/0xd23
entry_SYSCALL_64_after_hwframe+0x49/0xbe
This is a false positive as all the dirty pages are flushed out before
the filesystem can be frozen.
One way to avoid this splat is to add GFP_NOFS to the affected allocation
calls by using the memalloc_nofs_save()/memalloc_nofs_restore() pair.
This shouldn't matter unless the system is really running out of memory.
In that particular case, the filesystem freeze operation may fail while
it was succeeding previously.
Without this patch, the command sequence below will show that the lock
dependency chain sb_internal -> fs_reclaim exists.
# fsfreeze -f /home
# fsfreeze --unfreeze /home
# grep -i fs_reclaim -C 3 /proc/lockdep_chains | grep -C 5 sb_internal
After applying the patch, such sb_internal -> fs_reclaim lock dependency
chain can no longer be found. Because of that, the locking dependency
warning will not be shown.
Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2020-07-08 20:21:44 +03:00
memalloc_nofs_restore ( flags ) ;
2021-08-06 21:05:39 +03:00
/*
* For read - write filesystems , we need to restart the inodegc on error
* because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not
* going to be run to restart it now . We are at SB_FREEZE_FS level
* here , so we can restart safely without racing with a stop in
* xfs_fs_sync_fs ( ) .
*/
2021-08-19 04:46:52 +03:00
if ( ret & & ! xfs_is_readonly ( mp ) ) {
2021-08-06 21:05:42 +03:00
xfs_blockgc_start ( mp ) ;
2021-08-06 21:05:39 +03:00
xfs_inodegc_start ( mp ) ;
2021-08-06 21:05:42 +03:00
}
2021-08-06 21:05:39 +03:00
xfs: Fix false positive lockdep warning with sb_internal & fs_reclaim
Depending on the workloads, the following circular locking dependency
warning between sb_internal (a percpu rwsem) and fs_reclaim (a pseudo
lock) may show up:
======================================================
WARNING: possible circular locking dependency detected
5.0.0-rc1+ #60 Tainted: G W
------------------------------------------------------
fsfreeze/4346 is trying to acquire lock:
0000000026f1d784 (fs_reclaim){+.+.}, at:
fs_reclaim_acquire.part.19+0x5/0x30
but task is already holding lock:
0000000072bfc54b (sb_internal){++++}, at: percpu_down_write+0xb4/0x650
which lock already depends on the new lock.
:
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(sb_internal);
lock(fs_reclaim);
lock(sb_internal);
lock(fs_reclaim);
*** DEADLOCK ***
4 locks held by fsfreeze/4346:
#0: 00000000b478ef56 (sb_writers#8){++++}, at: percpu_down_write+0xb4/0x650
#1: 000000001ec487a9 (&type->s_umount_key#28){++++}, at: freeze_super+0xda/0x290
#2: 000000003edbd5a0 (sb_pagefaults){++++}, at: percpu_down_write+0xb4/0x650
#3: 0000000072bfc54b (sb_internal){++++}, at: percpu_down_write+0xb4/0x650
stack backtrace:
Call Trace:
dump_stack+0xe0/0x19a
print_circular_bug.isra.10.cold.34+0x2f4/0x435
check_prev_add.constprop.19+0xca1/0x15f0
validate_chain.isra.14+0x11af/0x3b50
__lock_acquire+0x728/0x1200
lock_acquire+0x269/0x5a0
fs_reclaim_acquire.part.19+0x29/0x30
fs_reclaim_acquire+0x19/0x20
kmem_cache_alloc+0x3e/0x3f0
kmem_zone_alloc+0x79/0x150
xfs_trans_alloc+0xfa/0x9d0
xfs_sync_sb+0x86/0x170
xfs_log_sbcount+0x10f/0x140
xfs_quiesce_attr+0x134/0x270
xfs_fs_freeze+0x4a/0x70
freeze_super+0x1af/0x290
do_vfs_ioctl+0xedc/0x16c0
ksys_ioctl+0x41/0x80
__x64_sys_ioctl+0x73/0xa9
do_syscall_64+0x18f/0xd23
entry_SYSCALL_64_after_hwframe+0x49/0xbe
This is a false positive as all the dirty pages are flushed out before
the filesystem can be frozen.
One way to avoid this splat is to add GFP_NOFS to the affected allocation
calls by using the memalloc_nofs_save()/memalloc_nofs_restore() pair.
This shouldn't matter unless the system is really running out of memory.
In that particular case, the filesystem freeze operation may fail while
it was succeeding previously.
Without this patch, the command sequence below will show that the lock
dependency chain sb_internal -> fs_reclaim exists.
# fsfreeze -f /home
# fsfreeze --unfreeze /home
# grep -i fs_reclaim -C 3 /proc/lockdep_chains | grep -C 5 sb_internal
After applying the patch, such sb_internal -> fs_reclaim lock dependency
chain can no longer be found. Because of that, the locking dependency
warning will not be shown.
Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
2020-07-08 20:21:44 +03:00
return ret ;
2019-11-05 00:58:48 +03:00
}
STATIC int
xfs_fs_unfreeze (
struct super_block * sb )
{
struct xfs_mount * mp = XFS_M ( sb ) ;
xfs_restore_resvblks ( mp ) ;
xfs_log_work_queue ( mp ) ;
2021-08-06 21:05:39 +03:00
/*
* Don ' t reactivate the inodegc worker on a readonly filesystem because
2021-08-06 21:05:42 +03:00
* inodes are sent directly to reclaim . Don ' t reactivate the blockgc
* worker because there are no speculative preallocations on a readonly
* filesystem .
2021-08-06 21:05:39 +03:00
*/
2021-08-19 04:46:52 +03:00
if ( ! xfs_is_readonly ( mp ) ) {
2021-08-06 21:05:42 +03:00
xfs_blockgc_start ( mp ) ;
2021-08-06 21:05:39 +03:00
xfs_inodegc_start ( mp ) ;
2021-08-06 21:05:42 +03:00
}
2021-08-06 21:05:39 +03:00
2019-11-05 00:58:48 +03:00
return 0 ;
}
/*
* This function fills in xfs_mount_t fields based on mount args .
* Note : the superblock _has_ now been read in .
*/
STATIC int
xfs_finish_flags (
struct xfs_mount * mp )
{
/* Fail a mount where the logbuf is smaller than the log stripe */
2021-08-19 04:46:37 +03:00
if ( xfs_has_logv2 ( mp ) ) {
2019-11-05 00:58:48 +03:00
if ( mp - > m_logbsize < = 0 & &
mp - > m_sb . sb_logsunit > XLOG_BIG_RECORD_BSIZE ) {
mp - > m_logbsize = mp - > m_sb . sb_logsunit ;
} else if ( mp - > m_logbsize > 0 & &
mp - > m_logbsize < mp - > m_sb . sb_logsunit ) {
xfs_warn ( mp ,
" logbuf size must be greater than or equal to log stripe size " ) ;
return - EINVAL ;
}
} else {
/* Fail a mount if the logbuf is larger than 32K */
if ( mp - > m_logbsize > XLOG_BIG_RECORD_BSIZE ) {
xfs_warn ( mp ,
" logbuf size for version 1 logs must be 16K or 32K " ) ;
return - EINVAL ;
}
}
/*
* V5 filesystems always use attr2 format for attributes .
*/
2021-08-19 04:46:52 +03:00
if ( xfs_has_crc ( mp ) & & xfs_has_noattr2 ( mp ) ) {
2019-11-05 00:58:48 +03:00
xfs_warn ( mp , " Cannot mount a V5 filesystem as noattr2. "
" attr2 is always enabled for V5 filesystems. " ) ;
return - EINVAL ;
}
/*
* prohibit r / w mounts of read - only filesystems
*/
2021-08-19 04:46:52 +03:00
if ( ( mp - > m_sb . sb_flags & XFS_SBF_READONLY ) & & ! xfs_is_readonly ( mp ) ) {
2019-11-05 00:58:48 +03:00
xfs_warn ( mp ,
" cannot mount a read-only filesystem as read-write " ) ;
return - EROFS ;
}
2021-08-06 21:05:37 +03:00
if ( ( mp - > m_qflags & XFS_GQUOTA_ACCT ) & &
( mp - > m_qflags & XFS_PQUOTA_ACCT ) & &
2021-08-19 04:46:37 +03:00
! xfs_has_pquotino ( mp ) ) {
2019-11-05 00:58:48 +03:00
xfs_warn ( mp ,
" Super block does not support project and group quota together " ) ;
return - EINVAL ;
}
return 0 ;
}
static int
xfs_init_percpu_counters (
struct xfs_mount * mp )
{
int error ;
error = percpu_counter_init ( & mp - > m_icount , 0 , GFP_KERNEL ) ;
if ( error )
return - ENOMEM ;
error = percpu_counter_init ( & mp - > m_ifree , 0 , GFP_KERNEL ) ;
if ( error )
goto free_icount ;
error = percpu_counter_init ( & mp - > m_fdblocks , 0 , GFP_KERNEL ) ;
if ( error )
goto free_ifree ;
error = percpu_counter_init ( & mp - > m_delalloc_blks , 0 , GFP_KERNEL ) ;
if ( error )
goto free_fdblocks ;
2022-04-11 23:49:42 +03:00
error = percpu_counter_init ( & mp - > m_frextents , 0 , GFP_KERNEL ) ;
if ( error )
goto free_delalloc ;
2019-11-05 00:58:48 +03:00
return 0 ;
2022-04-11 23:49:42 +03:00
free_delalloc :
percpu_counter_destroy ( & mp - > m_delalloc_blks ) ;
2019-11-05 00:58:48 +03:00
free_fdblocks :
percpu_counter_destroy ( & mp - > m_fdblocks ) ;
free_ifree :
percpu_counter_destroy ( & mp - > m_ifree ) ;
free_icount :
percpu_counter_destroy ( & mp - > m_icount ) ;
return - ENOMEM ;
}
void
xfs_reinit_percpu_counters (
struct xfs_mount * mp )
{
percpu_counter_set ( & mp - > m_icount , mp - > m_sb . sb_icount ) ;
percpu_counter_set ( & mp - > m_ifree , mp - > m_sb . sb_ifree ) ;
percpu_counter_set ( & mp - > m_fdblocks , mp - > m_sb . sb_fdblocks ) ;
2022-04-11 23:49:42 +03:00
percpu_counter_set ( & mp - > m_frextents , mp - > m_sb . sb_frextents ) ;
2019-11-05 00:58:48 +03:00
}
static void
xfs_destroy_percpu_counters (
struct xfs_mount * mp )
{
percpu_counter_destroy ( & mp - > m_icount ) ;
percpu_counter_destroy ( & mp - > m_ifree ) ;
percpu_counter_destroy ( & mp - > m_fdblocks ) ;
2021-08-19 04:46:53 +03:00
ASSERT ( xfs_is_shutdown ( mp ) | |
2019-11-05 00:58:48 +03:00
percpu_counter_sum ( & mp - > m_delalloc_blks ) = = 0 ) ;
percpu_counter_destroy ( & mp - > m_delalloc_blks ) ;
2022-04-11 23:49:42 +03:00
percpu_counter_destroy ( & mp - > m_frextents ) ;
2019-11-05 00:58:48 +03:00
}
2021-08-06 21:05:39 +03:00
static int
xfs_inodegc_init_percpu (
struct xfs_mount * mp )
{
struct xfs_inodegc * gc ;
int cpu ;
mp - > m_inodegc = alloc_percpu ( struct xfs_inodegc ) ;
if ( ! mp - > m_inodegc )
return - ENOMEM ;
for_each_possible_cpu ( cpu ) {
gc = per_cpu_ptr ( mp - > m_inodegc , cpu ) ;
init_llist_head ( & gc - > list ) ;
gc - > items = 0 ;
2022-06-16 17:44:31 +03:00
INIT_DELAYED_WORK ( & gc - > work , xfs_inodegc_worker ) ;
2021-08-06 21:05:39 +03:00
}
return 0 ;
}
static void
xfs_inodegc_free_percpu (
struct xfs_mount * mp )
{
if ( ! mp - > m_inodegc )
return ;
free_percpu ( mp - > m_inodegc ) ;
}
2019-11-05 00:58:48 +03:00
static void
xfs_fs_put_super (
struct super_block * sb )
{
struct xfs_mount * mp = XFS_M ( sb ) ;
/* if ->fill_super failed, we have no mount to tear down */
if ( ! sb - > s_fs_info )
return ;
2022-11-17 06:20:21 +03:00
xfs_notice ( mp , " Unmounting Filesystem %pU " , & mp - > m_sb . sb_uuid ) ;
2019-11-05 00:58:48 +03:00
xfs_filestream_unmount ( mp ) ;
xfs_unmountfs ( mp ) ;
xfs_freesb ( mp ) ;
free_percpu ( mp - > m_stats . xs_stats ) ;
2021-08-06 21:05:38 +03:00
xfs_mount_list_del ( mp ) ;
2021-08-06 21:05:39 +03:00
xfs_inodegc_free_percpu ( mp ) ;
2019-11-05 00:58:48 +03:00
xfs_destroy_percpu_counters ( mp ) ;
xfs_destroy_mount_workqueues ( mp ) ;
xfs_close_devices ( mp ) ;
sb - > s_fs_info = NULL ;
xfs_mount_free ( mp ) ;
}
static long
xfs_fs_nr_cached_objects (
struct super_block * sb ,
struct shrink_control * sc )
{
/* Paranoia: catch incorrect calls during mount setup or teardown */
if ( WARN_ON_ONCE ( ! sb - > s_fs_info ) )
return 0 ;
return xfs_reclaim_inodes_count ( XFS_M ( sb ) ) ;
}
static long
xfs_fs_free_cached_objects (
struct super_block * sb ,
struct shrink_control * sc )
2012-10-08 14:56:07 +04:00
{
2019-11-05 00:58:48 +03:00
return xfs_reclaim_inodes_nr ( XFS_M ( sb ) , sc - > nr_to_scan ) ;
}
2012-10-08 14:56:07 +04:00
2019-11-05 00:58:48 +03:00
static const struct super_operations xfs_super_operations = {
. alloc_inode = xfs_fs_alloc_inode ,
. destroy_inode = xfs_fs_destroy_inode ,
. dirty_inode = xfs_fs_dirty_inode ,
. drop_inode = xfs_fs_drop_inode ,
. put_super = xfs_fs_put_super ,
. sync_fs = xfs_fs_sync_fs ,
. freeze_fs = xfs_fs_freeze ,
. unfreeze_fs = xfs_fs_unfreeze ,
. statfs = xfs_fs_statfs ,
. show_options = xfs_fs_show_options ,
. nr_cached_objects = xfs_fs_nr_cached_objects ,
. free_cached_objects = xfs_fs_free_cached_objects ,
} ;
2012-10-08 14:56:07 +04:00
2019-11-05 00:58:48 +03:00
static int
suffix_kstrtoint (
const char * s ,
unsigned int base ,
int * res )
{
int last , shift_left_factor = 0 , _res ;
char * value ;
int ret = 0 ;
value = kstrdup ( s , GFP_KERNEL ) ;
if ( ! value )
return - ENOMEM ;
last = strlen ( value ) - 1 ;
if ( value [ last ] = = ' K ' | | value [ last ] = = ' k ' ) {
shift_left_factor = 10 ;
value [ last ] = ' \0 ' ;
}
if ( value [ last ] = = ' M ' | | value [ last ] = = ' m ' ) {
shift_left_factor = 20 ;
value [ last ] = ' \0 ' ;
}
if ( value [ last ] = = ' G ' | | value [ last ] = = ' g ' ) {
shift_left_factor = 30 ;
value [ last ] = ' \0 ' ;
}
if ( kstrtoint ( value , base , & _res ) )
ret = - EINVAL ;
kfree ( value ) ;
* res = _res < < shift_left_factor ;
return ret ;
2012-10-08 14:56:07 +04:00
}
2021-03-22 19:52:02 +03:00
static inline void
xfs_fs_warn_deprecated (
struct fs_context * fc ,
struct fs_parameter * param ,
uint64_t flag ,
bool value )
{
/* Don't print the warning if reconfiguring and current mount point
* already had the flag set
*/
if ( ( fc - > purpose & FS_CONTEXT_FOR_RECONFIGURE ) & &
2021-08-19 04:46:52 +03:00
! ! ( XFS_M ( fc - > root - > d_sb ) - > m_features & flag ) = = value )
2021-03-22 19:52:02 +03:00
return ;
xfs_warn ( fc - > s_fs_info , " %s mount option is deprecated. " , param - > key ) ;
}
2007-10-11 12:11:14 +04:00
/*
2019-11-05 00:58:48 +03:00
* Set mount state from a mount option .
*
* NOTE : mp - > m_super is NULL here !
2007-10-11 12:11:14 +04:00
*/
2019-11-05 00:58:48 +03:00
static int
2020-12-05 02:59:39 +03:00
xfs_fs_parse_param (
2019-11-05 00:58:48 +03:00
struct fs_context * fc ,
struct fs_parameter * param )
2005-04-17 02:20:36 +04:00
{
2021-03-22 19:52:01 +03:00
struct xfs_mount * parsing_mp = fc - > s_fs_info ;
2019-11-05 00:58:48 +03:00
struct fs_parse_result result ;
int size = 0 ;
int opt ;
2007-10-11 12:11:14 +04:00
2019-09-07 14:23:15 +03:00
opt = fs_parse ( fc , xfs_fs_parameters , param , & result ) ;
2019-11-05 00:58:48 +03:00
if ( opt < 0 )
return opt ;
2005-04-17 02:20:36 +04:00
2019-11-05 00:58:48 +03:00
switch ( opt ) {
case Opt_logbufs :
2021-03-22 19:52:01 +03:00
parsing_mp - > m_logbufs = result . uint_32 ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_logbsize :
2021-03-22 19:52:01 +03:00
if ( suffix_kstrtoint ( param - > string , 10 , & parsing_mp - > m_logbsize ) )
2019-11-05 00:58:48 +03:00
return - EINVAL ;
return 0 ;
case Opt_logdev :
2021-03-22 19:52:01 +03:00
kfree ( parsing_mp - > m_logname ) ;
parsing_mp - > m_logname = kstrdup ( param - > string , GFP_KERNEL ) ;
if ( ! parsing_mp - > m_logname )
2019-11-05 00:58:48 +03:00
return - ENOMEM ;
return 0 ;
case Opt_rtdev :
2021-03-22 19:52:01 +03:00
kfree ( parsing_mp - > m_rtname ) ;
parsing_mp - > m_rtname = kstrdup ( param - > string , GFP_KERNEL ) ;
if ( ! parsing_mp - > m_rtname )
2019-11-05 00:58:48 +03:00
return - ENOMEM ;
return 0 ;
case Opt_allocsize :
if ( suffix_kstrtoint ( param - > string , 10 , & size ) )
return - EINVAL ;
2021-03-22 19:52:01 +03:00
parsing_mp - > m_allocsize_log = ffs ( size ) - 1 ;
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features | = XFS_FEAT_ALLOCSIZE ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_grpid :
case Opt_bsdgroups :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features | = XFS_FEAT_GRPID ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_nogrpid :
case Opt_sysvgroups :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features & = ~ XFS_FEAT_GRPID ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_wsync :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features | = XFS_FEAT_WSYNC ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_norecovery :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features | = XFS_FEAT_NORECOVERY ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_noalign :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features | = XFS_FEAT_NOALIGN ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_swalloc :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features | = XFS_FEAT_SWALLOC ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_sunit :
2021-03-22 19:52:01 +03:00
parsing_mp - > m_dalign = result . uint_32 ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_swidth :
2021-03-22 19:52:01 +03:00
parsing_mp - > m_swidth = result . uint_32 ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_inode32 :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features | = XFS_FEAT_SMALL_INUMS ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_inode64 :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features & = ~ XFS_FEAT_SMALL_INUMS ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_nouuid :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features | = XFS_FEAT_NOUUID ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_largeio :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features | = XFS_FEAT_LARGE_IOSIZE ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_nolargeio :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features & = ~ XFS_FEAT_LARGE_IOSIZE ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_filestreams :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features | = XFS_FEAT_FILESTREAMS ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_noquota :
2021-03-22 19:52:01 +03:00
parsing_mp - > m_qflags & = ~ XFS_ALL_QUOTA_ACCT ;
parsing_mp - > m_qflags & = ~ XFS_ALL_QUOTA_ENFD ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_quota :
case Opt_uquota :
case Opt_usrquota :
2021-08-06 21:05:37 +03:00
parsing_mp - > m_qflags | = ( XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD ) ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_qnoenforce :
case Opt_uqnoenforce :
2021-08-06 21:05:37 +03:00
parsing_mp - > m_qflags | = XFS_UQUOTA_ACCT ;
2021-03-22 19:52:01 +03:00
parsing_mp - > m_qflags & = ~ XFS_UQUOTA_ENFD ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_pquota :
case Opt_prjquota :
2021-08-06 21:05:37 +03:00
parsing_mp - > m_qflags | = ( XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD ) ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_pqnoenforce :
2021-08-06 21:05:37 +03:00
parsing_mp - > m_qflags | = XFS_PQUOTA_ACCT ;
2021-03-22 19:52:01 +03:00
parsing_mp - > m_qflags & = ~ XFS_PQUOTA_ENFD ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_gquota :
case Opt_grpquota :
2021-08-06 21:05:37 +03:00
parsing_mp - > m_qflags | = ( XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD ) ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_gqnoenforce :
2021-08-06 21:05:37 +03:00
parsing_mp - > m_qflags | = XFS_GQUOTA_ACCT ;
2021-03-22 19:52:01 +03:00
parsing_mp - > m_qflags & = ~ XFS_GQUOTA_ENFD ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_discard :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features | = XFS_FEAT_DISCARD ;
2019-11-05 00:58:48 +03:00
return 0 ;
case Opt_nodiscard :
2021-08-19 04:46:52 +03:00
parsing_mp - > m_features & = ~ XFS_FEAT_DISCARD ;
2019-11-05 00:58:48 +03:00
return 0 ;
# ifdef CONFIG_FS_DAX
case Opt_dax :
2021-03-22 19:52:01 +03:00
xfs_mount_set_dax_mode ( parsing_mp , XFS_DAX_ALWAYS ) ;
2020-05-04 19:02:42 +03:00
return 0 ;
case Opt_dax_enum :
2021-03-22 19:52:01 +03:00
xfs_mount_set_dax_mode ( parsing_mp , result . uint_32 ) ;
2019-11-05 00:58:48 +03:00
return 0 ;
# endif
2020-09-25 21:10:29 +03:00
/* Following mount options will be removed in September 2025 */
case Opt_ikeep :
2021-08-19 04:46:52 +03:00
xfs_fs_warn_deprecated ( fc , param , XFS_FEAT_IKEEP , true ) ;
parsing_mp - > m_features | = XFS_FEAT_IKEEP ;
2020-09-25 21:10:29 +03:00
return 0 ;
case Opt_noikeep :
2021-08-19 04:46:52 +03:00
xfs_fs_warn_deprecated ( fc , param , XFS_FEAT_IKEEP , false ) ;
parsing_mp - > m_features & = ~ XFS_FEAT_IKEEP ;
2020-09-25 21:10:29 +03:00
return 0 ;
case Opt_attr2 :
2021-08-19 04:46:52 +03:00
xfs_fs_warn_deprecated ( fc , param , XFS_FEAT_ATTR2 , true ) ;
parsing_mp - > m_features | = XFS_FEAT_ATTR2 ;
2020-09-25 21:10:29 +03:00
return 0 ;
case Opt_noattr2 :
2021-08-19 04:46:52 +03:00
xfs_fs_warn_deprecated ( fc , param , XFS_FEAT_NOATTR2 , true ) ;
parsing_mp - > m_features | = XFS_FEAT_NOATTR2 ;
2020-09-25 21:10:29 +03:00
return 0 ;
2019-11-05 00:58:48 +03:00
default :
2021-03-22 19:52:01 +03:00
xfs_warn ( parsing_mp , " unknown mount option [%s]. " , param - > key ) ;
2019-11-05 00:58:48 +03:00
return - EINVAL ;
}
2010-02-06 01:59:53 +03:00
return 0 ;
}
2019-11-05 00:58:48 +03:00
static int
2020-12-05 02:59:39 +03:00
xfs_fs_validate_params (
2008-05-20 05:30:59 +04:00
struct xfs_mount * mp )
{
2021-08-19 04:46:52 +03:00
/* No recovery flag requires a read-only mount */
2021-08-19 04:46:52 +03:00
if ( xfs_has_norecovery ( mp ) & & ! xfs_is_readonly ( mp ) ) {
2019-11-05 00:58:48 +03:00
xfs_warn ( mp , " no-recovery mounts must be read-only. " ) ;
2014-06-25 08:58:08 +04:00
return - EINVAL ;
2013-06-05 06:09:09 +04:00
}
/*
2021-08-19 04:46:52 +03:00
* We have not read the superblock at this point , so only the attr2
* mount option can set the attr2 feature by this stage .
2013-06-05 06:09:09 +04:00
*/
2021-08-19 04:46:52 +03:00
if ( xfs_has_attr2 ( mp ) & & xfs_has_noattr2 ( mp ) ) {
xfs: rework attr2 feature and mount options
The attr2 feature is somewhat unique in that it has both a superblock
feature bit to enable it and mount options to enable and disable it.
Back when it was first introduced in 2005, attr2 was disabled unless
either the attr2 superblock feature bit was set, or the attr2 mount
option was set. If the superblock feature bit was not set but the
mount option was set, then when the first attr2 format inode fork
was created, it would set the superblock feature bit. This is as it
should be - the superblock feature bit indicated the presence of the
attr2 on disk format.
The noattr2 mount option, however, did not affect the superblock
feature bit. If noattr2 was specified, the on-disk superblock
feature bit was ignored and the code always just created attr1
format inode forks. If neither of the attr2 or noattr2 mounts
option were specified, then the behaviour was determined by the
superblock feature bit.
This was all pretty sane.
Fast foward 3 years, and we are dealing with fallout from the
botched sb_features2 addition and having to deal with feature
mismatches between the sb_features2 and sb_bad_features2 fields. The
attr2 feature bit was one of these flags. The reconciliation was
done well after mount option parsing and, unfortunately, the feature
reconciliation had a bug where it ignored the noattr2 mount option.
For reasons lost to the mists of time, it was decided that resolving
this issue in commit 7c12f296500e ("[XFS] Fix up noattr2 so that it
will properly update the versionnum and features2 fields.") required
noattr2 to clear the superblock attr2 feature bit. This greatly
complicated the attr2 behaviour and broke rules about feature bits
needing to be set when those specific features are present in the
filesystem.
By complicated, I mean that it introduced problems due to feature
bit interactions with log recovery. All of the superblock feature
bit checks are done prior to log recovery, but if we crash after
removing a feature bit, then on the next mount we see the feature
bit in the unrecovered superblock, only to have it go away after the
log has been replayed. This means our mount time feature processing
could be all wrong.
Hence you can mount with noattr2, crash shortly afterwards, and
mount again without attr2 or noattr2 and still have attr2 enabled
because the second mount sees attr2 still enabled in the superblock
before recovery runs and removes the feature bit. It's just a mess.
Further, this is all legacy code as the v5 format requires attr2 to
be enabled at all times and it cannot be disabled. i.e. the noattr2
mount option returns an error when used on v5 format filesystems.
To straighten this all out, this patch reverts the attr2/noattr2
mount option behaviour back to the original behaviour. There is no
reason for disabling attr2 these days, so we will only do this when
the noattr2 mount option is set. This will not remove the superblock
feature bit. The superblock bit will provide the default behaviour
and only track whether attr2 is present on disk or not. The attr2
mount option will enable the creation of attr2 format inode forks,
and if the superblock feature bit is not set it will be added when
the first attr2 inode fork is created.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
2021-08-19 04:46:25 +03:00
xfs_warn ( mp , " attr2 and noattr2 cannot both be specified. " ) ;
2014-06-25 08:58:08 +04:00
return - EINVAL ;
2013-06-05 06:09:09 +04:00
}
xfs: rework attr2 feature and mount options
The attr2 feature is somewhat unique in that it has both a superblock
feature bit to enable it and mount options to enable and disable it.
Back when it was first introduced in 2005, attr2 was disabled unless
either the attr2 superblock feature bit was set, or the attr2 mount
option was set. If the superblock feature bit was not set but the
mount option was set, then when the first attr2 format inode fork
was created, it would set the superblock feature bit. This is as it
should be - the superblock feature bit indicated the presence of the
attr2 on disk format.
The noattr2 mount option, however, did not affect the superblock
feature bit. If noattr2 was specified, the on-disk superblock
feature bit was ignored and the code always just created attr1
format inode forks. If neither of the attr2 or noattr2 mounts
option were specified, then the behaviour was determined by the
superblock feature bit.
This was all pretty sane.
Fast foward 3 years, and we are dealing with fallout from the
botched sb_features2 addition and having to deal with feature
mismatches between the sb_features2 and sb_bad_features2 fields. The
attr2 feature bit was one of these flags. The reconciliation was
done well after mount option parsing and, unfortunately, the feature
reconciliation had a bug where it ignored the noattr2 mount option.
For reasons lost to the mists of time, it was decided that resolving
this issue in commit 7c12f296500e ("[XFS] Fix up noattr2 so that it
will properly update the versionnum and features2 fields.") required
noattr2 to clear the superblock attr2 feature bit. This greatly
complicated the attr2 behaviour and broke rules about feature bits
needing to be set when those specific features are present in the
filesystem.
By complicated, I mean that it introduced problems due to feature
bit interactions with log recovery. All of the superblock feature
bit checks are done prior to log recovery, but if we crash after
removing a feature bit, then on the next mount we see the feature
bit in the unrecovered superblock, only to have it go away after the
log has been replayed. This means our mount time feature processing
could be all wrong.
Hence you can mount with noattr2, crash shortly afterwards, and
mount again without attr2 or noattr2 and still have attr2 enabled
because the second mount sees attr2 still enabled in the superblock
before recovery runs and removes the feature bit. It's just a mess.
Further, this is all legacy code as the v5 format requires attr2 to
be enabled at all times and it cannot be disabled. i.e. the noattr2
mount option returns an error when used on v5 format filesystems.
To straighten this all out, this patch reverts the attr2/noattr2
mount option behaviour back to the original behaviour. There is no
reason for disabling attr2 these days, so we will only do this when
the noattr2 mount option is set. This will not remove the superblock
feature bit. The superblock bit will provide the default behaviour
and only track whether attr2 is present on disk or not. The attr2
mount option will enable the creation of attr2 format inode forks,
and if the superblock feature bit is not set it will be added when
the first attr2 inode fork is created.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
2021-08-19 04:46:25 +03:00
2021-08-19 04:46:52 +03:00
if ( xfs_has_noalign ( mp ) & & ( mp - > m_dalign | | mp - > m_swidth ) ) {
2011-03-07 02:00:35 +03:00
xfs_warn ( mp ,
2019-11-05 00:58:48 +03:00
" sunit and swidth options incompatible with the noalign option " ) ;
return - EINVAL ;
2008-05-20 05:30:59 +04:00
}
2019-11-05 00:58:48 +03:00
if ( ! IS_ENABLED ( CONFIG_XFS_QUOTA ) & & mp - > m_qflags ! = 0 ) {
xfs_warn ( mp , " quota support not available in this kernel. " ) ;
2014-06-25 08:58:08 +04:00
return - EINVAL ;
2013-07-20 02:36:02 +04:00
}
2019-11-05 00:58:48 +03:00
if ( ( mp - > m_dalign & & ! mp - > m_swidth ) | |
( ! mp - > m_dalign & & mp - > m_swidth ) ) {
xfs_warn ( mp , " sunit and swidth must be specified together " ) ;
return - EINVAL ;
}
2015-02-23 13:22:31 +03:00
2019-11-05 00:58:48 +03:00
if ( mp - > m_dalign & & ( mp - > m_swidth % mp - > m_dalign ! = 0 ) ) {
xfs_warn ( mp ,
" stripe width (%d) must be a multiple of the stripe unit (%d) " ,
mp - > m_swidth , mp - > m_dalign ) ;
return - EINVAL ;
}
2015-02-23 13:22:31 +03:00
2019-11-05 00:58:48 +03:00
if ( mp - > m_logbufs ! = - 1 & &
mp - > m_logbufs ! = 0 & &
( mp - > m_logbufs < XLOG_MIN_ICLOGS | |
mp - > m_logbufs > XLOG_MAX_ICLOGS ) ) {
xfs_warn ( mp , " invalid logbufs value: %d [not %d-%d] " ,
mp - > m_logbufs , XLOG_MIN_ICLOGS , XLOG_MAX_ICLOGS ) ;
return - EINVAL ;
}
2015-02-23 13:22:31 +03:00
2019-11-05 00:58:48 +03:00
if ( mp - > m_logbsize ! = - 1 & &
mp - > m_logbsize ! = 0 & &
( mp - > m_logbsize < XLOG_MIN_RECORD_BSIZE | |
mp - > m_logbsize > XLOG_MAX_RECORD_BSIZE | |
! is_power_of_2 ( mp - > m_logbsize ) ) ) {
xfs_warn ( mp ,
" invalid logbufsize: %d [not 16k,32k,64k,128k or 256k] " ,
mp - > m_logbsize ) ;
return - EINVAL ;
}
2015-02-23 13:22:31 +03:00
2021-08-19 04:46:52 +03:00
if ( xfs_has_allocsize ( mp ) & &
2019-11-05 00:58:48 +03:00
( mp - > m_allocsize_log > XFS_MAX_IO_LOG | |
mp - > m_allocsize_log < XFS_MIN_IO_LOG ) ) {
xfs_warn ( mp , " invalid log iosize: %d [not %d-%d] " ,
mp - > m_allocsize_log , XFS_MIN_IO_LOG , XFS_MAX_IO_LOG ) ;
return - EINVAL ;
}
2019-04-26 04:26:22 +03:00
2015-02-23 13:22:31 +03:00
return 0 ;
2018-03-24 03:54:32 +03:00
}
2007-08-30 11:21:22 +04:00
2019-11-05 00:58:46 +03:00
static int
2020-12-05 02:59:39 +03:00
xfs_fs_fill_super (
2018-03-24 03:54:32 +03:00
struct super_block * sb ,
2019-11-05 00:58:46 +03:00
struct fs_context * fc )
2018-03-24 03:54:32 +03:00
{
2019-11-05 00:58:46 +03:00
struct xfs_mount * mp = sb - > s_fs_info ;
2018-03-24 03:54:32 +03:00
struct inode * root ;
2019-11-06 19:07:46 +03:00
int flags = 0 , error ;
2018-03-24 03:54:32 +03:00
2019-11-05 00:58:46 +03:00
mp - > m_super = sb ;
2005-04-17 02:20:36 +04:00
2020-12-05 02:59:39 +03:00
error = xfs_fs_validate_params ( mp ) ;
2007-08-30 11:20:39 +04:00
if ( error )
2019-11-05 00:58:40 +03:00
goto out_free_names ;
2005-04-17 02:20:36 +04:00
sb_min_blocksize ( sb , BBSIZE ) ;
2008-06-23 07:23:01 +04:00
sb - > s_xattr = xfs_xattr_handlers ;
2006-03-14 06:06:18 +03:00
sb - > s_export_op = & xfs_export_operations ;
2009-02-09 10:47:34 +03:00
# ifdef CONFIG_XFS_QUOTA
2006-03-14 06:06:18 +03:00
sb - > s_qcop = & xfs_quotactl_operations ;
2014-10-01 00:35:33 +04:00
sb - > s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ ;
2009-02-09 10:47:34 +03:00
# endif
2006-03-14 06:06:18 +03:00
sb - > s_op = & xfs_super_operations ;
2005-04-17 02:20:36 +04:00
2018-05-11 07:50:23 +03:00
/*
* Delay mount work if the debug hook is set . This is debug
* instrumention to coordinate simulation of xfs mount failures with
* VFS superblock operations
*/
if ( xfs_globals . mount_delay ) {
xfs_notice ( mp , " Delaying mount for %d seconds. " ,
xfs_globals . mount_delay ) ;
msleep ( xfs_globals . mount_delay * 1000 ) ;
}
2019-11-05 00:58:46 +03:00
if ( fc - > sb_flags & SB_SILENT )
2008-05-20 05:30:59 +04:00
flags | = XFS_MFSI_QUIET ;
2008-10-30 09:53:24 +03:00
error = xfs_open_devices ( mp ) ;
2008-05-20 05:31:13 +04:00
if ( error )
2019-11-05 00:58:40 +03:00
goto out_free_names ;
2008-05-20 05:30:59 +04:00
2014-06-25 08:58:08 +04:00
error = xfs_init_mount_workqueues ( mp ) ;
2010-09-30 06:25:54 +04:00
if ( error )
goto out_close_devices ;
2008-05-20 09:10:52 +04:00
2015-02-23 13:22:31 +03:00
error = xfs_init_percpu_counters ( mp ) ;
2012-02-29 13:53:48 +04:00
if ( error )
goto out_destroy_workqueues ;
2021-08-06 21:05:39 +03:00
error = xfs_inodegc_init_percpu ( mp ) ;
if ( error )
goto out_destroy_counters ;
2021-08-06 21:05:38 +03:00
/*
* All percpu data structures requiring cleanup when a cpu goes offline
* must be allocated before adding this @ mp to the cpu - dead handler ' s
* mount list .
*/
xfs_mount_list_add ( mp ) ;
2015-10-12 10:21:19 +03:00
/* Allocate stats memory before we do operations that might use it */
mp - > m_stats . xs_stats = alloc_percpu ( struct xfsstats ) ;
if ( ! mp - > m_stats . xs_stats ) {
2015-10-19 00:42:47 +03:00
error = - ENOMEM ;
2021-08-06 21:05:39 +03:00
goto out_destroy_inodegc ;
2015-10-12 10:21:19 +03:00
}
2008-05-20 05:30:59 +04:00
error = xfs_readsb ( mp , flags ) ;
if ( error )
2015-10-12 10:21:19 +03:00
goto out_free_stats ;
2008-10-30 09:53:24 +03:00
error = xfs_finish_flags ( mp ) ;
2008-05-20 05:30:59 +04:00
if ( error )
2008-05-20 09:11:05 +04:00
goto out_free_sb ;
2008-05-20 05:30:59 +04:00
2008-05-20 09:10:36 +04:00
error = xfs_setup_devices ( mp ) ;
2008-05-20 05:31:13 +04:00
if ( error )
2008-05-20 09:11:05 +04:00
goto out_free_sb ;
2008-05-20 05:30:59 +04:00
2020-09-10 20:57:17 +03:00
/* V4 support is undergoing deprecation. */
2021-08-19 04:46:37 +03:00
if ( ! xfs_has_crc ( mp ) ) {
2020-09-10 20:57:17 +03:00
# ifdef CONFIG_XFS_SUPPORT_V4
xfs_warn_once ( mp ,
" Deprecated V4 format (crc=0) will not be supported after September 2030. " ) ;
# else
xfs_warn ( mp ,
" Deprecated V4 format (crc=0) not supported by kernel. " ) ;
error = - EINVAL ;
goto out_free_sb ;
# endif
}
2020-11-24 22:45:55 +03:00
/* Filesystem claims it needs repair, so refuse the mount. */
2021-08-19 04:46:55 +03:00
if ( xfs_has_needsrepair ( mp ) ) {
2020-11-24 22:45:55 +03:00
xfs_warn ( mp , " Filesystem needs repair. Please run xfs_repair. " ) ;
error = - EFSCORRUPTED ;
goto out_free_sb ;
}
2020-11-24 22:45:54 +03:00
/*
* Don ' t touch the filesystem if a user tool thinks it owns the primary
* superblock . mkfs doesn ' t clear the flag from secondary supers , so
* we don ' t check them at all .
*/
if ( mp - > m_sb . sb_inprogress ) {
xfs_warn ( mp , " Offline file system operation in progress! " ) ;
error = - EFSCORRUPTED ;
goto out_free_sb ;
}
/*
* Until this is fixed only page - sized or smaller data blocks work .
*/
if ( mp - > m_sb . sb_blocksize > PAGE_SIZE ) {
xfs_warn ( mp ,
" File system with blocksize %d bytes. "
" Only pagesize (%ld) or less will currently work. " ,
mp - > m_sb . sb_blocksize , PAGE_SIZE ) ;
error = - ENOSYS ;
goto out_free_sb ;
}
/* Ensure this filesystem fits in the page cache limits */
if ( xfs_sb_validate_fsb_count ( & mp - > m_sb , mp - > m_sb . sb_dblocks ) | |
xfs_sb_validate_fsb_count ( & mp - > m_sb , mp - > m_sb . sb_rblocks ) ) {
xfs_warn ( mp ,
" file system too large to be mounted on this system. " ) ;
error = - EFBIG ;
goto out_free_sb ;
}
2020-01-03 00:20:13 +03:00
/*
* XFS block mappings use 54 bits to store the logical block offset .
* This should suffice to handle the maximum file size that the VFS
* supports ( currently 2 ^ 63 bytes on 64 - bit and ULONG_MAX < < PAGE_SHIFT
* bytes on 32 - bit ) , but as XFS and VFS have gotten the s_maxbytes
* calculation wrong on 32 - bit kernels in the past , we ' ll add a WARN_ON
* to check this assertion .
*
* Avoid integer overflow by comparing the maximum bmbt offset to the
* maximum pagecache offset in units of fs blocks .
*/
2020-12-05 00:28:35 +03:00
if ( ! xfs_verify_fileoff ( mp , XFS_B_TO_FSBT ( mp , MAX_LFS_FILESIZE ) ) ) {
2020-01-03 00:20:13 +03:00
xfs_warn ( mp ,
" MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)! " ,
XFS_B_TO_FSBT ( mp , MAX_LFS_FILESIZE ) ,
XFS_MAX_FILEOFF ) ;
error = - EINVAL ;
goto out_free_sb ;
}
2008-05-20 05:30:59 +04:00
error = xfs_filestream_mount ( mp ) ;
if ( error )
2008-05-20 09:11:05 +04:00
goto out_free_sb ;
2008-05-20 05:30:59 +04:00
2011-03-26 01:14:57 +03:00
/*
* we must configure the block size in the superblock before we run the
* full mount process as the mount process can lookup and cache inodes .
*/
2018-10-18 09:20:19 +03:00
sb - > s_magic = XFS_SUPER_MAGIC ;
2007-10-11 12:09:40 +04:00
sb - > s_blocksize = mp - > m_sb . sb_blocksize ;
sb - > s_blocksize_bits = ffs ( sb - > s_blocksize ) - 1 ;
2020-01-03 00:20:13 +03:00
sb - > s_maxbytes = MAX_LFS_FILESIZE ;
2012-02-06 21:45:27 +04:00
sb - > s_max_links = XFS_MAXLINK ;
2005-04-17 02:20:36 +04:00
sb - > s_time_gran = 1 ;
2021-08-19 04:46:37 +03:00
if ( xfs_has_bigtime ( mp ) ) {
2020-08-17 19:59:07 +03:00
sb - > s_time_min = xfs_bigtime_to_unix ( XFS_BIGTIME_TIME_MIN ) ;
sb - > s_time_max = xfs_bigtime_to_unix ( XFS_BIGTIME_TIME_MAX ) ;
} else {
sb - > s_time_min = XFS_LEGACY_TIME_MIN ;
sb - > s_time_max = XFS_LEGACY_TIME_MAX ;
}
2020-08-24 21:58:01 +03:00
trace_xfs_inode_timestamp_range ( mp , sb - > s_time_min , sb - > s_time_max ) ;
2019-06-29 05:30:22 +03:00
sb - > s_iflags | = SB_I_CGROUPWB ;
2005-04-17 02:20:36 +04:00
set_posix_acl_flag ( sb ) ;
2013-06-27 10:04:59 +04:00
/* version 5 superblocks support inode version counters. */
2021-08-19 04:46:56 +03:00
if ( xfs_has_crc ( mp ) )
2017-10-18 23:56:26 +03:00
sb - > s_flags | = SB_I_VERSION ;
2013-06-27 10:04:59 +04:00
2021-08-19 04:46:52 +03:00
if ( xfs_has_dax_always ( mp ) ) {
2021-11-29 13:21:41 +03:00
error = xfs_setup_dax_always ( mp ) ;
if ( error )
2018-02-01 01:21:56 +03:00
goto out_filestream_unmount ;
2015-06-04 02:19:18 +03:00
}
2022-04-15 07:52:55 +03:00
if ( xfs_has_discard ( mp ) & & ! bdev_max_discard_sectors ( sb - > s_bdev ) ) {
xfs_warn ( mp ,
" mounting with \" discard \" option, but the device does not support discard " ) ;
mp - > m_features & = ~ XFS_FEAT_DISCARD ;
2017-09-18 22:03:56 +03:00
}
2021-08-19 04:46:37 +03:00
if ( xfs_has_reflink ( mp ) ) {
2019-02-18 20:38:49 +03:00
if ( mp - > m_sb . sb_rblocks ) {
xfs_alert ( mp ,
2018-02-01 03:38:18 +03:00
" reflink not compatible with realtime device! " ) ;
2019-02-18 20:38:49 +03:00
error = - EINVAL ;
goto out_filestream_unmount ;
}
if ( xfs_globals . always_cow ) {
xfs_info ( mp , " using DEBUG-only always_cow mode. " ) ;
mp - > m_always_cow = true ;
}
2018-02-01 03:38:18 +03:00
}
2021-08-19 04:46:37 +03:00
if ( xfs_has_rmapbt ( mp ) & & mp - > m_sb . sb_rblocks ) {
2016-08-03 05:20:57 +03:00
xfs_alert ( mp ,
2018-01-31 20:47:25 +03:00
" reverse mapping btree not compatible with realtime device! " ) ;
error = - EINVAL ;
goto out_filestream_unmount ;
2016-08-26 08:59:19 +03:00
}
2016-08-03 05:20:57 +03:00
2021-08-11 08:03:20 +03:00
if ( xfs_has_large_extent_counts ( mp ) )
xfs_warn ( mp ,
" EXPERIMENTAL Large extent counts feature in use. Use at your own risk! " ) ;
2012-04-13 16:10:44 +04:00
error = xfs_mountfs ( mp ) ;
2011-07-13 15:43:48 +04:00
if ( error )
2012-10-08 14:56:00 +04:00
goto out_filestream_unmount ;
2011-03-26 01:14:57 +03:00
2008-08-13 09:45:15 +04:00
root = igrab ( VFS_I ( mp - > m_rootip ) ) ;
2008-03-27 10:00:54 +03:00
if ( ! root ) {
2014-06-25 08:58:08 +04:00
error = - ENOENT ;
2012-04-13 16:10:44 +04:00
goto out_unmount ;
2008-02-05 04:14:01 +03:00
}
2012-01-09 07:15:13 +04:00
sb - > s_root = d_make_root ( root ) ;
2008-03-27 10:00:54 +03:00
if ( ! sb - > s_root ) {
2014-06-25 08:58:08 +04:00
error = - ENOMEM ;
2012-04-13 16:10:44 +04:00
goto out_unmount ;
2005-04-17 02:20:36 +04:00
}
2007-08-30 11:21:22 +04:00
2005-04-17 02:20:36 +04:00
return 0 ;
2012-10-08 14:55:59 +04:00
2012-10-08 14:56:00 +04:00
out_filestream_unmount :
2008-05-20 09:11:11 +04:00
xfs_filestream_unmount ( mp ) ;
2008-05-20 09:11:05 +04:00
out_free_sb :
xfs_freesb ( mp ) ;
2015-10-12 10:21:19 +03:00
out_free_stats :
free_percpu ( mp - > m_stats . xs_stats ) ;
2021-08-06 21:05:39 +03:00
out_destroy_inodegc :
2021-08-06 21:05:38 +03:00
xfs_mount_list_del ( mp ) ;
2021-08-06 21:05:39 +03:00
xfs_inodegc_free_percpu ( mp ) ;
2008-10-30 09:53:24 +03:00
out_destroy_counters :
2015-02-23 13:22:31 +03:00
xfs_destroy_percpu_counters ( mp ) ;
2015-10-12 10:21:19 +03:00
out_destroy_workqueues :
2012-02-29 13:53:48 +04:00
xfs_destroy_mount_workqueues ( mp ) ;
2010-09-30 06:25:54 +04:00
out_close_devices :
2008-05-20 05:31:13 +04:00
xfs_close_devices ( mp ) ;
2019-11-05 00:58:40 +03:00
out_free_names :
2018-05-11 07:50:23 +03:00
sb - > s_fs_info = NULL ;
2019-11-05 00:58:42 +03:00
xfs_mount_free ( mp ) ;
2014-06-25 08:58:08 +04:00
return error ;
2008-05-20 05:30:59 +04:00
2011-07-13 15:43:48 +04:00
out_unmount :
2008-05-20 05:30:52 +04:00
xfs_filestream_unmount ( mp ) ;
2008-05-20 05:31:13 +04:00
xfs_unmountfs ( mp ) ;
2008-08-13 10:50:21 +04:00
goto out_free_sb ;
2005-04-17 02:20:36 +04:00
}
2019-11-05 00:58:46 +03:00
static int
2020-12-05 02:59:39 +03:00
xfs_fs_get_tree (
2019-11-05 00:58:46 +03:00
struct fs_context * fc )
{
2020-12-05 02:59:39 +03:00
return get_tree_bdev ( fc , xfs_fs_fill_super ) ;
2019-11-05 00:58:46 +03:00
}
2019-11-05 00:58:47 +03:00
static int
xfs_remount_rw (
struct xfs_mount * mp )
{
struct xfs_sb * sbp = & mp - > m_sb ;
int error ;
2021-08-19 04:46:52 +03:00
if ( xfs_has_norecovery ( mp ) ) {
2019-11-05 00:58:47 +03:00
xfs_warn ( mp ,
" ro->rw transition prohibited on norecovery mount " ) ;
return - EINVAL ;
}
2021-08-19 04:46:56 +03:00
if ( xfs_sb_is_v5 ( sbp ) & &
2019-11-05 00:58:47 +03:00
xfs_sb_has_ro_compat_feature ( sbp , XFS_SB_FEAT_RO_COMPAT_UNKNOWN ) ) {
xfs_warn ( mp ,
" ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem " ,
( sbp - > sb_features_ro_compat &
XFS_SB_FEAT_RO_COMPAT_UNKNOWN ) ) ;
return - EINVAL ;
}
2021-08-19 04:46:52 +03:00
clear_bit ( XFS_OPSTATE_READONLY , & mp - > m_opstate ) ;
2019-11-05 00:58:47 +03:00
/*
* If this is the first remount to writeable state we might have some
* superblock changes to update .
*/
if ( mp - > m_update_sb ) {
error = xfs_sync_sb ( mp , false ) ;
if ( error ) {
xfs_warn ( mp , " failed to write sb changes " ) ;
return error ;
}
mp - > m_update_sb = false ;
}
/*
* Fill out the reserve pool if it is empty . Use the stashed value if
* it is non - zero , otherwise go with the default .
*/
xfs_restore_resvblks ( mp ) ;
xfs_log_work_queue ( mp ) ;
2021-01-23 03:48:44 +03:00
xfs_blockgc_start ( mp ) ;
2019-11-05 00:58:47 +03:00
/* Create the per-AG metadata reservation pool .*/
error = xfs_fs_reserve_ag_blocks ( mp ) ;
if ( error & & error ! = - ENOSPC )
return error ;
2021-08-06 21:05:39 +03:00
/* Re-enable the background inode inactivation worker. */
xfs_inodegc_start ( mp ) ;
2019-11-05 00:58:47 +03:00
return 0 ;
}
static int
xfs_remount_ro (
struct xfs_mount * mp )
{
xfs: remove all COW fork extents when remounting readonly
As part of multiple customer escalations due to file data corruption
after copy on write operations, I wrote some fstests that use fsstress
to hammer on COW to shake things loose. Regrettably, I caught some
filesystem shutdowns due to incorrect rmap operations with the following
loop:
mount <filesystem> # (0)
fsstress <run only readonly ops> & # (1)
while true; do
fsstress <run all ops>
mount -o remount,ro # (2)
fsstress <run only readonly ops>
mount -o remount,rw # (3)
done
When (2) happens, notice that (1) is still running. xfs_remount_ro will
call xfs_blockgc_stop to walk the inode cache to free all the COW
extents, but the blockgc mechanism races with (1)'s reader threads to
take IOLOCKs and loses, which means that it doesn't clean them all out.
Call such a file (A).
When (3) happens, xfs_remount_rw calls xfs_reflink_recover_cow, which
walks the ondisk refcount btree and frees any COW extent that it finds.
This function does not check the inode cache, which means that incore
COW forks of inode (A) is now inconsistent with the ondisk metadata. If
one of those former COW extents are allocated and mapped into another
file (B) and someone triggers a COW to the stale reservation in (A), A's
dirty data will be written into (B) and once that's done, those blocks
will be transferred to (A)'s data fork without bumping the refcount.
The results are catastrophic -- file (B) and the refcount btree are now
corrupt. Solve this race by forcing the xfs_blockgc_free_space to run
synchronously, which causes xfs_icwalk to return to inodes that were
skipped because the blockgc code couldn't take the IOLOCK. This is safe
to do here because the VFS has already prohibited new writer threads.
Fixes: 10ddf64e420f ("xfs: remove leftover CoW reservations when remounting ro")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Chandan Babu R <chandan.babu@oracle.com>
2021-12-07 02:38:20 +03:00
struct xfs_icwalk icw = {
. icw_flags = XFS_ICWALK_FLAG_SYNC ,
} ;
int error ;
2019-11-05 00:58:47 +03:00
2022-02-03 19:29:21 +03:00
/* Flush all the dirty data to disk. */
error = sync_filesystem ( mp - > m_super ) ;
if ( error )
return error ;
2019-11-05 00:58:47 +03:00
/*
* Cancel background eofb scanning so it cannot race with the final
* log force + buftarg wait and deadlock the remount .
*/
2021-01-23 03:48:44 +03:00
xfs_blockgc_stop ( mp ) ;
2019-11-05 00:58:47 +03:00
xfs: remove all COW fork extents when remounting readonly
As part of multiple customer escalations due to file data corruption
after copy on write operations, I wrote some fstests that use fsstress
to hammer on COW to shake things loose. Regrettably, I caught some
filesystem shutdowns due to incorrect rmap operations with the following
loop:
mount <filesystem> # (0)
fsstress <run only readonly ops> & # (1)
while true; do
fsstress <run all ops>
mount -o remount,ro # (2)
fsstress <run only readonly ops>
mount -o remount,rw # (3)
done
When (2) happens, notice that (1) is still running. xfs_remount_ro will
call xfs_blockgc_stop to walk the inode cache to free all the COW
extents, but the blockgc mechanism races with (1)'s reader threads to
take IOLOCKs and loses, which means that it doesn't clean them all out.
Call such a file (A).
When (3) happens, xfs_remount_rw calls xfs_reflink_recover_cow, which
walks the ondisk refcount btree and frees any COW extent that it finds.
This function does not check the inode cache, which means that incore
COW forks of inode (A) is now inconsistent with the ondisk metadata. If
one of those former COW extents are allocated and mapped into another
file (B) and someone triggers a COW to the stale reservation in (A), A's
dirty data will be written into (B) and once that's done, those blocks
will be transferred to (A)'s data fork without bumping the refcount.
The results are catastrophic -- file (B) and the refcount btree are now
corrupt. Solve this race by forcing the xfs_blockgc_free_space to run
synchronously, which causes xfs_icwalk to return to inodes that were
skipped because the blockgc code couldn't take the IOLOCK. This is safe
to do here because the VFS has already prohibited new writer threads.
Fixes: 10ddf64e420f ("xfs: remove leftover CoW reservations when remounting ro")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Chandan Babu R <chandan.babu@oracle.com>
2021-12-07 02:38:20 +03:00
/*
* Clear out all remaining COW staging extents and speculative post - EOF
* preallocations so that we don ' t leave inodes requiring inactivation
* cleanups during reclaim on a read - only mount . We must process every
* cached inode , so this requires a synchronous cache scan .
*/
error = xfs_blockgc_free_space ( mp , & icw ) ;
2019-11-05 00:58:47 +03:00
if ( error ) {
xfs_force_shutdown ( mp , SHUTDOWN_CORRUPT_INCORE ) ;
return error ;
}
2021-08-06 21:05:39 +03:00
/*
* Stop the inodegc background worker . xfs_fs_reconfigure already
* flushed all pending inodegc work when it sync ' d the filesystem .
* The VFS holds s_umount , so we know that inodes cannot enter
* xfs_fs_destroy_inode during a remount operation . In readonly mode
* we send inodes straight to reclaim , so no inodes will be queued .
*/
xfs_inodegc_stop ( mp ) ;
2019-11-05 00:58:47 +03:00
/* Free the per-AG metadata reservation pool. */
error = xfs_fs_unreserve_ag_blocks ( mp ) ;
if ( error ) {
xfs_force_shutdown ( mp , SHUTDOWN_CORRUPT_INCORE ) ;
return error ;
}
/*
* Before we sync the metadata , we need to free up the reserve block
* pool so that the used block count in the superblock on disk is
* correct at the end of the remount . Stash the current * reserve pool
* size so that if we get remounted rw , we can return it to the same
* size .
*/
xfs_save_resvblks ( mp ) ;
2021-01-23 03:48:24 +03:00
xfs_log_clean ( mp ) ;
2021-08-19 04:46:52 +03:00
set_bit ( XFS_OPSTATE_READONLY , & mp - > m_opstate ) ;
2019-11-05 00:58:47 +03:00
return 0 ;
}
/*
* Logically we would return an error here to prevent users from believing
* they might have changed mount options using remount which can ' t be changed .
*
* But unfortunately mount ( 8 ) adds all options from mtab and fstab to the mount
* arguments in some cases so we can ' t blindly reject options , but have to
* check for each specified option if it actually differs from the currently
* set option and only reject it if that ' s the case .
*
* Until that is implemented we return success for every remount request , and
* silently ignore all options that we can ' t actually change .
*/
static int
2020-12-05 02:59:39 +03:00
xfs_fs_reconfigure (
2019-11-05 00:58:47 +03:00
struct fs_context * fc )
{
struct xfs_mount * mp = XFS_M ( fc - > root - > d_sb ) ;
struct xfs_mount * new_mp = fc - > s_fs_info ;
int flags = fc - > sb_flags ;
int error ;
2020-07-15 18:30:37 +03:00
/* version 5 superblocks always support version counters. */
2021-08-19 04:46:56 +03:00
if ( xfs_has_crc ( mp ) )
2020-07-15 18:30:37 +03:00
fc - > sb_flags | = SB_I_VERSION ;
2020-12-05 02:59:39 +03:00
error = xfs_fs_validate_params ( new_mp ) ;
2019-11-05 00:58:47 +03:00
if ( error )
return error ;
/* inode32 -> inode64 */
2021-08-19 04:46:52 +03:00
if ( xfs_has_small_inums ( mp ) & & ! xfs_has_small_inums ( new_mp ) ) {
mp - > m_features & = ~ XFS_FEAT_SMALL_INUMS ;
2021-08-19 04:46:56 +03:00
mp - > m_maxagi = xfs_set_inode_alloc ( mp , mp - > m_sb . sb_agcount ) ;
2019-11-05 00:58:47 +03:00
}
/* inode64 -> inode32 */
2021-08-19 04:46:52 +03:00
if ( ! xfs_has_small_inums ( mp ) & & xfs_has_small_inums ( new_mp ) ) {
mp - > m_features | = XFS_FEAT_SMALL_INUMS ;
2021-08-19 04:46:56 +03:00
mp - > m_maxagi = xfs_set_inode_alloc ( mp , mp - > m_sb . sb_agcount ) ;
2019-11-05 00:58:47 +03:00
}
/* ro -> rw */
2021-08-19 04:46:52 +03:00
if ( xfs_is_readonly ( mp ) & & ! ( flags & SB_RDONLY ) ) {
2019-11-05 00:58:47 +03:00
error = xfs_remount_rw ( mp ) ;
if ( error )
return error ;
}
/* rw -> ro */
2021-08-19 04:46:52 +03:00
if ( ! xfs_is_readonly ( mp ) & & ( flags & SB_RDONLY ) ) {
2019-11-05 00:58:47 +03:00
error = xfs_remount_ro ( mp ) ;
if ( error )
return error ;
}
return 0 ;
}
2020-12-05 02:59:39 +03:00
static void xfs_fs_free (
2019-11-05 00:58:46 +03:00
struct fs_context * fc )
{
struct xfs_mount * mp = fc - > s_fs_info ;
/*
* mp is stored in the fs_context when it is initialized .
* mp is transferred to the superblock on a successful mount ,
* but if an error occurs before the transfer we have to free
* it here .
*/
if ( mp )
xfs_mount_free ( mp ) ;
}
static const struct fs_context_operations xfs_context_ops = {
2020-12-05 02:59:39 +03:00
. parse_param = xfs_fs_parse_param ,
. get_tree = xfs_fs_get_tree ,
. reconfigure = xfs_fs_reconfigure ,
. free = xfs_fs_free ,
2019-11-05 00:58:46 +03:00
} ;
static int xfs_init_fs_context (
struct fs_context * fc )
{
struct xfs_mount * mp ;
2019-11-05 00:58:48 +03:00
mp = kmem_alloc ( sizeof ( struct xfs_mount ) , KM_ZERO ) ;
2019-11-05 00:58:46 +03:00
if ( ! mp )
return - ENOMEM ;
2019-11-05 00:58:48 +03:00
spin_lock_init ( & mp - > m_sb_lock ) ;
INIT_RADIX_TREE ( & mp - > m_perag_tree , GFP_ATOMIC ) ;
spin_lock_init ( & mp - > m_perag_lock ) ;
mutex_init ( & mp - > m_growlock ) ;
2020-04-12 23:11:10 +03:00
INIT_WORK ( & mp - > m_flush_inodes_work , xfs_flush_inodes_worker ) ;
2019-11-05 00:58:48 +03:00
INIT_DELAYED_WORK ( & mp - > m_reclaim_work , xfs_reclaim_worker ) ;
mp - > m_kobj . kobject . kset = xfs_kset ;
/*
* We don ' t create the finobt per - ag space reservation until after log
* recovery , so we must set this to true so that an ifree transaction
* started during log recovery will not depend on space reservations
* for finobt expansion .
*/
mp - > m_finobt_nores = true ;
2019-11-05 00:58:46 +03:00
/*
* These can be overridden by the mount option parsing .
*/
mp - > m_logbufs = - 1 ;
mp - > m_logbsize = - 1 ;
mp - > m_allocsize_log = 16 ; /* 64k */
/*
* Copy binary VFS mount flags we are interested in .
*/
if ( fc - > sb_flags & SB_RDONLY )
2021-08-19 04:46:52 +03:00
set_bit ( XFS_OPSTATE_READONLY , & mp - > m_opstate ) ;
2019-11-05 00:58:46 +03:00
if ( fc - > sb_flags & SB_DIRSYNC )
2021-08-19 04:46:52 +03:00
mp - > m_features | = XFS_FEAT_DIRSYNC ;
2019-11-05 00:58:46 +03:00
if ( fc - > sb_flags & SB_SYNCHRONOUS )
2021-08-19 04:46:52 +03:00
mp - > m_features | = XFS_FEAT_WSYNC ;
2019-11-05 00:58:46 +03:00
fc - > s_fs_info = mp ;
fc - > ops = & xfs_context_ops ;
return 0 ;
}
2007-02-21 00:57:47 +03:00
static struct file_system_type xfs_fs_type = {
2005-04-17 02:20:36 +04:00
. owner = THIS_MODULE ,
. name = " xfs " ,
2019-11-05 00:58:46 +03:00
. init_fs_context = xfs_init_fs_context ,
2019-09-07 14:23:15 +03:00
. parameters = xfs_fs_parameters ,
2005-04-17 02:20:36 +04:00
. kill_sb = kill_block_super ,
2021-01-21 16:19:58 +03:00
. fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP ,
2005-04-17 02:20:36 +04:00
} ;
2013-03-03 07:39:14 +04:00
MODULE_ALIAS_FS ( " xfs " ) ;
2005-04-17 02:20:36 +04:00
2008-07-18 11:11:46 +04:00
STATIC int __init
2021-10-12 21:09:23 +03:00
xfs_init_caches ( void )
2008-07-18 11:11:46 +04:00
{
2021-09-23 22:21:37 +03:00
int error ;
2022-07-19 04:20:37 +03:00
xfs_buf_cache = kmem_cache_create ( " xfs_buf " , sizeof ( struct xfs_buf ) , 0 ,
SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
SLAB_MEM_SPREAD ,
NULL ) ;
if ( ! xfs_buf_cache )
goto out ;
2021-10-12 21:09:23 +03:00
xfs_log_ticket_cache = kmem_cache_create ( " xfs_log_ticket " ,
2019-11-14 23:43:03 +03:00
sizeof ( struct xlog_ticket ) ,
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_log_ticket_cache )
2022-07-19 04:20:37 +03:00
goto out_destroy_buf_cache ;
2008-07-18 11:11:46 +04:00
2021-09-23 22:21:37 +03:00
error = xfs_btree_init_cur_caches ( ) ;
if ( error )
2021-10-13 00:17:01 +03:00
goto out_destroy_log_ticket_cache ;
2008-07-18 11:11:46 +04:00
2021-10-13 00:11:01 +03:00
error = xfs_defer_init_item_caches ( ) ;
if ( error )
goto out_destroy_btree_cur_cache ;
2021-10-12 21:09:23 +03:00
xfs_da_state_cache = kmem_cache_create ( " xfs_da_state " ,
2019-11-14 23:43:03 +03:00
sizeof ( struct xfs_da_state ) ,
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_da_state_cache )
2021-10-13 00:11:01 +03:00
goto out_destroy_defer_item_cache ;
2008-07-18 11:11:46 +04:00
2021-10-12 21:09:23 +03:00
xfs_ifork_cache = kmem_cache_create ( " xfs_ifork " ,
2019-11-14 23:43:03 +03:00
sizeof ( struct xfs_ifork ) ,
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_ifork_cache )
goto out_destroy_da_state_cache ;
2008-07-18 11:11:46 +04:00
2021-10-12 21:09:23 +03:00
xfs_trans_cache = kmem_cache_create ( " xfs_trans " ,
2019-11-14 23:43:03 +03:00
sizeof ( struct xfs_trans ) ,
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_trans_cache )
goto out_destroy_ifork_cache ;
2008-07-18 11:11:46 +04:00
2010-06-23 12:11:15 +04:00
2008-07-18 11:11:46 +04:00
/*
2021-10-12 21:09:23 +03:00
* The size of the cache - allocated buf log item is the maximum
2008-07-18 11:11:46 +04:00
* size possible under XFS . This wastes a little bit of memory ,
* but it is much faster .
*/
2021-10-12 21:09:23 +03:00
xfs_buf_item_cache = kmem_cache_create ( " xfs_buf_item " ,
2019-11-14 23:43:03 +03:00
sizeof ( struct xfs_buf_log_item ) ,
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_buf_item_cache )
goto out_destroy_trans_cache ;
2008-07-18 11:11:46 +04:00
2021-10-12 21:09:23 +03:00
xfs_efd_cache = kmem_cache_create ( " xfs_efd_item " ,
2022-10-21 19:10:05 +03:00
xfs_efd_log_item_sizeof ( XFS_EFD_MAX_FAST_EXTENTS ) ,
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_efd_cache )
goto out_destroy_buf_item_cache ;
2008-07-18 11:11:46 +04:00
2021-10-12 21:09:23 +03:00
xfs_efi_cache = kmem_cache_create ( " xfs_efi_item " ,
2022-10-21 19:10:05 +03:00
xfs_efi_log_item_sizeof ( XFS_EFI_MAX_FAST_EXTENTS ) ,
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_efi_cache )
goto out_destroy_efd_cache ;
2008-07-18 11:11:46 +04:00
2021-10-12 21:09:23 +03:00
xfs_inode_cache = kmem_cache_create ( " xfs_inode " ,
2019-11-14 23:43:03 +03:00
sizeof ( struct xfs_inode ) , 0 ,
( SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
SLAB_MEM_SPREAD | SLAB_ACCOUNT ) ,
xfs_fs_inode_init_once ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_inode_cache )
goto out_destroy_efi_cache ;
2008-07-18 11:11:46 +04:00
2021-10-12 21:09:23 +03:00
xfs_ili_cache = kmem_cache_create ( " xfs_ili " ,
2019-11-14 23:43:03 +03:00
sizeof ( struct xfs_inode_log_item ) , 0 ,
2020-03-25 06:10:28 +03:00
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD ,
NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_ili_cache )
goto out_destroy_inode_cache ;
2019-11-14 23:43:03 +03:00
2021-10-12 21:09:23 +03:00
xfs_icreate_cache = kmem_cache_create ( " xfs_icr " ,
2019-11-14 23:43:03 +03:00
sizeof ( struct xfs_icreate_item ) ,
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_icreate_cache )
goto out_destroy_ili_cache ;
2008-07-18 11:11:46 +04:00
2021-10-12 21:09:23 +03:00
xfs_rud_cache = kmem_cache_create ( " xfs_rud_item " ,
2019-11-14 23:43:03 +03:00
sizeof ( struct xfs_rud_log_item ) ,
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_rud_cache )
goto out_destroy_icreate_cache ;
2016-08-03 05:04:45 +03:00
2021-10-12 21:09:23 +03:00
xfs_rui_cache = kmem_cache_create ( " xfs_rui_item " ,
2016-09-19 03:24:27 +03:00
xfs_rui_log_item_sizeof ( XFS_RUI_MAX_FAST_EXTENTS ) ,
2019-11-14 23:43:03 +03:00
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_rui_cache )
goto out_destroy_rud_cache ;
2016-08-03 05:04:45 +03:00
2021-10-12 21:09:23 +03:00
xfs_cud_cache = kmem_cache_create ( " xfs_cud_item " ,
2019-11-14 23:43:03 +03:00
sizeof ( struct xfs_cud_log_item ) ,
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_cud_cache )
goto out_destroy_rui_cache ;
2016-10-03 19:11:20 +03:00
2021-10-12 21:09:23 +03:00
xfs_cui_cache = kmem_cache_create ( " xfs_cui_item " ,
2016-10-03 19:11:20 +03:00
xfs_cui_log_item_sizeof ( XFS_CUI_MAX_FAST_EXTENTS ) ,
2019-11-14 23:43:03 +03:00
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_cui_cache )
goto out_destroy_cud_cache ;
2016-10-03 19:11:20 +03:00
2021-10-12 21:09:23 +03:00
xfs_bud_cache = kmem_cache_create ( " xfs_bud_item " ,
2019-11-14 23:43:03 +03:00
sizeof ( struct xfs_bud_log_item ) ,
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_bud_cache )
goto out_destroy_cui_cache ;
2016-10-03 19:11:25 +03:00
2021-10-12 21:09:23 +03:00
xfs_bui_cache = kmem_cache_create ( " xfs_bui_item " ,
2016-10-03 19:11:25 +03:00
xfs_bui_log_item_sizeof ( XFS_BUI_MAX_FAST_EXTENTS ) ,
2019-11-14 23:43:03 +03:00
0 , 0 , NULL ) ;
2021-10-12 21:09:23 +03:00
if ( ! xfs_bui_cache )
goto out_destroy_bud_cache ;
2016-10-03 19:11:25 +03:00
2022-05-22 08:59:48 +03:00
xfs_attrd_cache = kmem_cache_create ( " xfs_attrd_item " ,
sizeof ( struct xfs_attrd_log_item ) ,
0 , 0 , NULL ) ;
if ( ! xfs_attrd_cache )
goto out_destroy_bui_cache ;
xfs_attri_cache = kmem_cache_create ( " xfs_attri_item " ,
sizeof ( struct xfs_attri_log_item ) ,
0 , 0 , NULL ) ;
if ( ! xfs_attri_cache )
goto out_destroy_attrd_cache ;
2022-07-14 04:47:42 +03:00
xfs_iunlink_cache = kmem_cache_create ( " xfs_iul_item " ,
sizeof ( struct xfs_iunlink_item ) ,
0 , 0 , NULL ) ;
if ( ! xfs_iunlink_cache )
goto out_destroy_attri_cache ;
2008-07-18 11:11:46 +04:00
return 0 ;
2022-07-14 04:47:42 +03:00
out_destroy_attri_cache :
kmem_cache_destroy ( xfs_attri_cache ) ;
2022-05-22 08:59:48 +03:00
out_destroy_attrd_cache :
kmem_cache_destroy ( xfs_attrd_cache ) ;
out_destroy_bui_cache :
kmem_cache_destroy ( xfs_bui_cache ) ;
2021-10-12 21:09:23 +03:00
out_destroy_bud_cache :
kmem_cache_destroy ( xfs_bud_cache ) ;
out_destroy_cui_cache :
kmem_cache_destroy ( xfs_cui_cache ) ;
out_destroy_cud_cache :
kmem_cache_destroy ( xfs_cud_cache ) ;
out_destroy_rui_cache :
kmem_cache_destroy ( xfs_rui_cache ) ;
out_destroy_rud_cache :
kmem_cache_destroy ( xfs_rud_cache ) ;
out_destroy_icreate_cache :
kmem_cache_destroy ( xfs_icreate_cache ) ;
out_destroy_ili_cache :
kmem_cache_destroy ( xfs_ili_cache ) ;
out_destroy_inode_cache :
kmem_cache_destroy ( xfs_inode_cache ) ;
out_destroy_efi_cache :
kmem_cache_destroy ( xfs_efi_cache ) ;
out_destroy_efd_cache :
kmem_cache_destroy ( xfs_efd_cache ) ;
out_destroy_buf_item_cache :
kmem_cache_destroy ( xfs_buf_item_cache ) ;
out_destroy_trans_cache :
kmem_cache_destroy ( xfs_trans_cache ) ;
out_destroy_ifork_cache :
kmem_cache_destroy ( xfs_ifork_cache ) ;
out_destroy_da_state_cache :
kmem_cache_destroy ( xfs_da_state_cache ) ;
2021-10-13 00:11:01 +03:00
out_destroy_defer_item_cache :
xfs_defer_destroy_item_caches ( ) ;
2021-10-12 21:09:23 +03:00
out_destroy_btree_cur_cache :
2021-09-23 22:21:37 +03:00
xfs_btree_destroy_cur_caches ( ) ;
2021-10-12 21:09:23 +03:00
out_destroy_log_ticket_cache :
kmem_cache_destroy ( xfs_log_ticket_cache ) ;
2022-07-19 04:20:37 +03:00
out_destroy_buf_cache :
kmem_cache_destroy ( xfs_buf_cache ) ;
2008-07-18 11:11:46 +04:00
out :
return - ENOMEM ;
}
STATIC void
2021-10-12 21:09:23 +03:00
xfs_destroy_caches ( void )
2008-07-18 11:11:46 +04:00
{
2012-09-26 05:33:07 +04:00
/*
* Make sure all delayed rcu free are flushed before we
* destroy caches .
*/
rcu_barrier ( ) ;
2022-07-14 04:47:42 +03:00
kmem_cache_destroy ( xfs_iunlink_cache ) ;
2022-05-22 08:59:48 +03:00
kmem_cache_destroy ( xfs_attri_cache ) ;
kmem_cache_destroy ( xfs_attrd_cache ) ;
2021-10-12 21:09:23 +03:00
kmem_cache_destroy ( xfs_bui_cache ) ;
kmem_cache_destroy ( xfs_bud_cache ) ;
kmem_cache_destroy ( xfs_cui_cache ) ;
kmem_cache_destroy ( xfs_cud_cache ) ;
kmem_cache_destroy ( xfs_rui_cache ) ;
kmem_cache_destroy ( xfs_rud_cache ) ;
kmem_cache_destroy ( xfs_icreate_cache ) ;
kmem_cache_destroy ( xfs_ili_cache ) ;
kmem_cache_destroy ( xfs_inode_cache ) ;
kmem_cache_destroy ( xfs_efi_cache ) ;
kmem_cache_destroy ( xfs_efd_cache ) ;
kmem_cache_destroy ( xfs_buf_item_cache ) ;
kmem_cache_destroy ( xfs_trans_cache ) ;
kmem_cache_destroy ( xfs_ifork_cache ) ;
kmem_cache_destroy ( xfs_da_state_cache ) ;
2021-10-13 00:11:01 +03:00
xfs_defer_destroy_item_caches ( ) ;
2021-09-23 22:21:37 +03:00
xfs_btree_destroy_cur_caches ( ) ;
2021-10-12 21:09:23 +03:00
kmem_cache_destroy ( xfs_log_ticket_cache ) ;
2022-07-19 04:20:37 +03:00
kmem_cache_destroy ( xfs_buf_cache ) ;
2008-07-18 11:11:46 +04:00
}
2005-04-17 02:20:36 +04:00
2011-04-08 06:45:07 +04:00
STATIC int __init
xfs_init_workqueues ( void )
{
2012-03-22 09:15:07 +04:00
/*
* The allocation workqueue can be used in memory reclaim situations
* ( writepage path ) , and parallelism is only limited by the number of
* AGs in all the filesystems mounted . Hence use the default large
* max_active value for this workqueue .
*/
2014-09-09 05:44:46 +04:00
xfs_alloc_wq = alloc_workqueue ( " xfsalloc " ,
2021-01-23 03:48:42 +03:00
XFS_WQFLAGS ( WQ_MEM_RECLAIM | WQ_FREEZABLE ) , 0 ) ;
2012-03-22 09:15:07 +04:00
if ( ! xfs_alloc_wq )
2012-10-08 14:56:05 +04:00
return - ENOMEM ;
2012-03-22 09:15:07 +04:00
2021-01-23 03:48:42 +03:00
xfs_discard_wq = alloc_workqueue ( " xfsdiscard " , XFS_WQFLAGS ( WQ_UNBOUND ) ,
0 ) ;
2017-02-08 01:07:58 +03:00
if ( ! xfs_discard_wq )
goto out_free_alloc_wq ;
2011-04-08 06:45:07 +04:00
return 0 ;
2017-02-08 01:07:58 +03:00
out_free_alloc_wq :
destroy_workqueue ( xfs_alloc_wq ) ;
return - ENOMEM ;
2011-04-08 06:45:07 +04:00
}
2011-04-11 23:06:12 +04:00
STATIC void
2011-04-08 06:45:07 +04:00
xfs_destroy_workqueues ( void )
{
2017-02-08 01:07:58 +03:00
destroy_workqueue ( xfs_discard_wq ) ;
2012-03-22 09:15:07 +04:00
destroy_workqueue ( xfs_alloc_wq ) ;
2011-04-08 06:45:07 +04:00
}
2021-08-06 21:05:37 +03:00
# ifdef CONFIG_HOTPLUG_CPU
static int
xfs_cpu_dead (
unsigned int cpu )
{
2021-08-06 21:05:38 +03:00
struct xfs_mount * mp , * n ;
spin_lock ( & xfs_mount_list_lock ) ;
list_for_each_entry_safe ( mp , n , & xfs_mount_list , m_mount_list ) {
spin_unlock ( & xfs_mount_list_lock ) ;
2021-08-06 21:05:39 +03:00
xfs_inodegc_cpu_dead ( mp , cpu ) ;
2022-07-01 19:13:52 +03:00
xlog_cil_pcp_dead ( mp - > m_log , cpu ) ;
2021-08-06 21:05:38 +03:00
spin_lock ( & xfs_mount_list_lock ) ;
}
spin_unlock ( & xfs_mount_list_lock ) ;
2021-08-06 21:05:37 +03:00
return 0 ;
}
static int __init
xfs_cpu_hotplug_init ( void )
{
int error ;
error = cpuhp_setup_state_nocalls ( CPUHP_XFS_DEAD , " xfs:dead " , NULL ,
xfs_cpu_dead ) ;
if ( error < 0 )
xfs_alert ( NULL ,
" Failed to initialise CPU hotplug, error %d. XFS is non-functional. " ,
error ) ;
return error ;
}
static void
xfs_cpu_hotplug_destroy ( void )
{
cpuhp_remove_state_nocalls ( CPUHP_XFS_DEAD ) ;
}
# else /* !CONFIG_HOTPLUG_CPU */
static inline int xfs_cpu_hotplug_init ( void ) { return 0 ; }
static inline void xfs_cpu_hotplug_destroy ( void ) { }
# endif
2005-04-17 02:20:36 +04:00
STATIC int __init
2008-07-18 11:11:46 +04:00
init_xfs_fs ( void )
2005-04-17 02:20:36 +04:00
{
int error ;
2016-03-09 00:15:14 +03:00
xfs_check_ondisk_structs ( ) ;
xfs: test dir/attr hash when loading module
Back in the 6.2-rc1 days, Eric Whitney reported a fstests regression in
ext4 against generic/454. The cause of this test failure was the
unfortunate combination of setting an xattr name containing UTF8 encoded
emoji, an xattr hash function that accepted a char pointer with no
explicit signedness, signed type extension of those chars to an int, and
the 6.2 build tools maintainers deciding to mandate -funsigned-char
across the board. As a result, the ondisk extended attribute structure
written out by 6.1 and 6.2 were not the same.
This discrepancy, in fact, had been noticeable if a filesystem with such
an xattr were moved between any two architectures that don't employ the
same signedness of a raw "char" declaration. The only reason anyone
noticed is that x86 gcc defaults to signed, and no such -funsigned-char
update was made to e2fsprogs, so e2fsck immediately started reporting
data corruption.
After a day and a half of discussing how to handle this use case (xattrs
with bit 7 set anywhere in the name) without breaking existing users,
Linus merged his own patch and didn't tell the maintainer. None of the
ext4 developers realized this until AUTOSEL announced that the commit
had been backported to stable.
In the end, this problem could have been detected much earlier if there
had been any useful tests of hash function(s) in use inside ext4 to make
sure that they always produce the same outputs given the same inputs.
The XFS dirent/xattr name hash takes a uint8_t*, so I don't think it's
vulnerable to this problem. However, let's avoid all this drama by
adding our own self test to check that the da hash produces the same
outputs for a static pile of inputs on various platforms. This enables
us to fix any breakage that may result in a controlled fashion. The
buffer and test data are identical to the patches submitted to xfsprogs.
Link: https://lore.kernel.org/linux-ext4/Y8bpkm3jA3bDm3eL@debian-BULLSEYE-live-builder-AMD64/
Link: https://lore.kernel.org/linux-xfs/ZBUKCRR7xvIqPrpX@destitution/T/#md38272cc684e2c0d61494435ccbb91f022e8dee4
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2023-03-16 19:31:20 +03:00
error = xfs_dahash_test ( ) ;
if ( error )
return error ;
2008-11-28 06:23:33 +03:00
printk ( KERN_INFO XFS_VERSION_STRING " with "
XFS_BUILD_OPTIONS " enabled \n " ) ;
2005-04-17 02:20:36 +04:00
2008-07-18 11:11:46 +04:00
xfs_dir_startup ( ) ;
2005-04-17 02:20:36 +04:00
2021-08-06 21:05:37 +03:00
error = xfs_cpu_hotplug_init ( ) ;
2008-07-18 11:11:46 +04:00
if ( error )
goto out ;
2021-10-12 21:09:23 +03:00
error = xfs_init_caches ( ) ;
2021-08-06 21:05:37 +03:00
if ( error )
goto out_destroy_hp ;
2011-04-08 06:45:07 +04:00
error = xfs_init_workqueues ( ) ;
2008-07-18 11:11:46 +04:00
if ( error )
2021-10-12 21:09:23 +03:00
goto out_destroy_caches ;
2008-07-18 11:11:46 +04:00
2011-04-08 06:45:07 +04:00
error = xfs_mru_cache_init ( ) ;
if ( error )
goto out_destroy_wq ;
2008-07-18 11:11:46 +04:00
error = xfs_init_procfs ( ) ;
if ( error )
2022-07-19 04:20:37 +03:00
goto out_mru_cache_uninit ;
2008-07-18 11:11:46 +04:00
error = xfs_sysctl_register ( ) ;
if ( error )
goto out_cleanup_procfs ;
2005-04-17 02:20:36 +04:00
2014-07-15 01:41:37 +04:00
xfs_kset = kset_create_and_add ( " xfs " , NULL , fs_kobj ) ;
if ( ! xfs_kset ) {
error = - ENOMEM ;
2015-10-11 21:15:45 +03:00
goto out_sysctl_unregister ;
2014-07-15 01:41:37 +04:00
}
2015-10-11 21:19:45 +03:00
xfsstats . xs_kobj . kobject . kset = xfs_kset ;
xfsstats . xs_stats = alloc_percpu ( struct xfsstats ) ;
if ( ! xfsstats . xs_stats ) {
error = - ENOMEM ;
goto out_kset_unregister ;
}
error = xfs_sysfs_init ( & xfsstats . xs_kobj , & xfs_stats_ktype , NULL ,
2015-10-11 21:15:45 +03:00
" stats " ) ;
if ( error )
2015-10-11 21:19:45 +03:00
goto out_free_stats ;
2015-10-11 21:15:45 +03:00
2014-09-09 05:52:42 +04:00
# ifdef DEBUG
xfs_dbg_kobj . kobject . kset = xfs_kset ;
error = xfs_sysfs_init ( & xfs_dbg_kobj , & xfs_dbg_ktype , NULL , " debug " ) ;
2012-03-13 12:52:37 +04:00
if ( error )
2015-10-11 21:15:45 +03:00
goto out_remove_stats_kobj ;
2014-09-09 05:52:42 +04:00
# endif
error = xfs_qm_init ( ) ;
if ( error )
2015-10-11 21:15:45 +03:00
goto out_remove_dbg_kobj ;
2005-04-17 02:20:36 +04:00
error = register_filesystem ( & xfs_fs_type ) ;
if ( error )
2012-03-13 12:52:37 +04:00
goto out_qm_exit ;
2005-04-17 02:20:36 +04:00
return 0 ;
2012-03-13 12:52:37 +04:00
out_qm_exit :
xfs_qm_exit ( ) ;
2015-10-11 21:15:45 +03:00
out_remove_dbg_kobj :
2014-09-09 05:52:42 +04:00
# ifdef DEBUG
xfs_sysfs_del ( & xfs_dbg_kobj ) ;
2015-10-11 21:15:45 +03:00
out_remove_stats_kobj :
2014-09-09 05:52:42 +04:00
# endif
2015-10-11 21:19:45 +03:00
xfs_sysfs_del ( & xfsstats . xs_kobj ) ;
out_free_stats :
free_percpu ( xfsstats . xs_stats ) ;
2015-10-11 21:15:45 +03:00
out_kset_unregister :
2014-07-15 01:41:37 +04:00
kset_unregister ( xfs_kset ) ;
2008-07-18 11:11:46 +04:00
out_sysctl_unregister :
xfs_sysctl_unregister ( ) ;
out_cleanup_procfs :
xfs_cleanup_procfs ( ) ;
out_mru_cache_uninit :
xfs_mru_cache_uninit ( ) ;
2011-04-08 06:45:07 +04:00
out_destroy_wq :
xfs_destroy_workqueues ( ) ;
2021-10-12 21:09:23 +03:00
out_destroy_caches :
xfs_destroy_caches ( ) ;
2021-08-06 21:05:37 +03:00
out_destroy_hp :
xfs_cpu_hotplug_destroy ( ) ;
2008-07-18 11:11:46 +04:00
out :
2005-04-17 02:20:36 +04:00
return error ;
}
STATIC void __exit
2008-07-18 11:11:46 +04:00
exit_xfs_fs ( void )
2005-04-17 02:20:36 +04:00
{
2012-03-13 12:52:37 +04:00
xfs_qm_exit ( ) ;
2005-04-17 02:20:36 +04:00
unregister_filesystem ( & xfs_fs_type ) ;
2014-09-09 05:52:42 +04:00
# ifdef DEBUG
xfs_sysfs_del ( & xfs_dbg_kobj ) ;
# endif
2015-10-11 21:19:45 +03:00
xfs_sysfs_del ( & xfsstats . xs_kobj ) ;
free_percpu ( xfsstats . xs_stats ) ;
2014-07-15 01:41:37 +04:00
kset_unregister ( xfs_kset ) ;
2008-07-18 11:11:46 +04:00
xfs_sysctl_unregister ( ) ;
xfs_cleanup_procfs ( ) ;
xfs_mru_cache_uninit ( ) ;
2011-04-08 06:45:07 +04:00
xfs_destroy_workqueues ( ) ;
2021-10-12 21:09:23 +03:00
xfs_destroy_caches ( ) ;
2015-11-03 05:06:34 +03:00
xfs_uuid_table_free ( ) ;
2021-08-06 21:05:37 +03:00
xfs_cpu_hotplug_destroy ( ) ;
2005-04-17 02:20:36 +04:00
}
module_init ( init_xfs_fs ) ;
module_exit ( exit_xfs_fs ) ;
MODULE_AUTHOR ( " Silicon Graphics, Inc. " ) ;
MODULE_DESCRIPTION ( XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled " ) ;
MODULE_LICENSE ( " GPL " ) ;