2017-10-17 21:37:34 -07:00
/*
* Copyright ( C ) 2017 Oracle . All Rights Reserved .
*
* Author : Darrick J . Wong < darrick . wong @ oracle . com >
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version 2
* of the License , or ( at your option ) any later version .
*
* This program is distributed in the hope that it would be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write the Free Software Foundation ,
* Inc . , 51 Franklin St , Fifth Floor , Boston , MA 02110 - 1301 , USA .
*/
# include "xfs.h"
# include "xfs_fs.h"
# include "xfs_shared.h"
# include "xfs_format.h"
# include "xfs_trans_resv.h"
# include "xfs_mount.h"
# include "xfs_defer.h"
# include "xfs_btree.h"
# include "xfs_bit.h"
# include "xfs_log_format.h"
# include "xfs_trans.h"
# include "xfs_sb.h"
# include "xfs_inode.h"
2017-10-17 21:37:42 -07:00
# include "xfs_icache.h"
# include "xfs_itable.h"
2017-10-17 21:37:34 -07:00
# include "xfs_alloc.h"
# include "xfs_alloc_btree.h"
# include "xfs_bmap.h"
# include "xfs_bmap_btree.h"
# include "xfs_ialloc.h"
# include "xfs_ialloc_btree.h"
# include "xfs_refcount.h"
# include "xfs_refcount_btree.h"
# include "xfs_rmap.h"
# include "xfs_rmap_btree.h"
# include "scrub/xfs_scrub.h"
# include "scrub/scrub.h"
2017-10-17 21:37:36 -07:00
# include "scrub/common.h"
2017-10-17 21:37:34 -07:00
# include "scrub/trace.h"
2017-10-17 21:37:38 -07:00
# include "scrub/btree.h"
2017-10-17 21:37:34 -07:00
2017-10-17 21:37:35 -07:00
/*
* Online Scrub and Repair
*
* Traditionally , XFS ( the kernel driver ) did not know how to check or
* repair on - disk data structures . That task was left to the xfs_check
* and xfs_repair tools , both of which require taking the filesystem
* offline for a thorough but time consuming examination . Online
* scrub & repair , on the other hand , enables us to check the metadata
* for obvious errors while carefully stepping around the filesystem ' s
* ongoing operations , locking rules , etc .
*
* Given that most XFS metadata consist of records stored in a btree ,
* most of the checking functions iterate the btree blocks themselves
* looking for irregularities . When a record block is encountered , each
* record can be checked for obviously bad values . Record values can
* also be cross - referenced against other btrees to look for potential
* misunderstandings between pieces of metadata .
*
* It is expected that the checkers responsible for per - AG metadata
* structures will lock the AG headers ( AGI , AGF , AGFL ) , iterate the
* metadata structure , and perform any relevant cross - referencing before
* unlocking the AG and returning the results to userspace . These
* scrubbers must not keep an AG locked for too long to avoid tying up
* the block and inode allocators .
*
* Block maps and b - trees rooted in an inode present a special challenge
* because they can involve extents from any AG . The general scrubber
* structure of lock - > check - > xref - > unlock still holds , but AG
* locking order rules / must / be obeyed to avoid deadlocks . The
* ordering rule , of course , is that we must lock in increasing AG
* order . Helper functions are provided to track which AG headers we ' ve
* already locked . If we detect an imminent locking order violation , we
* can signal a potential deadlock , in which case the scrubber can jump
* out to the top level , lock all the AGs in order , and retry the scrub .
*
* For file data ( directories , extended attributes , symlinks ) scrub , we
* can simply lock the inode and walk the data . For btree data
* ( directories and attributes ) we follow the same btree - scrubbing
* strategy outlined previously to check the records .
*
* We use a bit of trickery with transactions to avoid buffer deadlocks
* if there is a cycle in the metadata . The basic problem is that
* travelling down a btree involves locking the current buffer at each
* tree level . If a pointer should somehow point back to a buffer that
* we ' ve already examined , we will deadlock due to the second buffer
* locking attempt . Note however that grabbing a buffer in transaction
* context links the locked buffer to the transaction . If we try to
* re - grab the buffer in the context of the same transaction , we avoid
* the second lock attempt and continue . Between the verifier and the
* scrubber , something will notice that something is amiss and report
* the corruption . Therefore , each scrubber will allocate an empty
* transaction , attach buffers to it , and cancel the transaction at the
* end of the scrub run . Cancelling a non - dirty transaction simply
* unlocks the buffers .
*
* There are four pieces of data that scrub can communicate to
* userspace . The first is the error code ( errno ) , which can be used to
* communicate operational errors in performing the scrub . There are
* also three flags that can be set in the scrub context . If the data
* structure itself is corrupt , the CORRUPT flag will be set . If
* the metadata is correct but otherwise suboptimal , the PREEN flag
* will be set .
2018-01-16 18:52:14 -08:00
*
* We perform secondary validation of filesystem metadata by
* cross - referencing every record with all other available metadata .
* For example , for block mapping extents , we verify that there are no
* records in the free space and inode btrees corresponding to that
* space extent and that there is a corresponding entry in the reverse
* mapping btree . Inconsistent metadata is noted by setting the
* XCORRUPT flag ; btree query function errors are noted by setting the
* XFAIL flag and deleting the cursor to prevent further attempts to
* cross - reference with a defective btree .
2017-10-17 21:37:35 -07:00
*/
2017-10-17 21:37:36 -07:00
/*
* Scrub probe - - userspace uses this to probe if we ' re willing to scrub
* or repair a given mountpoint . This will be used by xfs_scrub to
* probe the kernel ' s abilities to scrub ( and repair ) the metadata . We
* do this by validating the ioctl inputs from userspace , preparing the
* filesystem for a scrub ( or a repair ) operation , and immediately
* returning to userspace . Userspace can use the returned errno and
* structure state to decide ( in broad terms ) if scrub / repair are
* supported by the running kernel .
*/
2017-11-06 11:53:58 -08:00
static int
2017-10-17 21:37:36 -07:00
xfs_scrub_probe (
struct xfs_scrub_context * sc )
{
int error = 0 ;
if ( xfs_scrub_should_terminate ( sc , & error ) )
return error ;
return 0 ;
}
2017-10-17 21:37:35 -07:00
/* Scrub setup and teardown */
/* Free all the resources and finish the transactions. */
STATIC int
xfs_scrub_teardown (
struct xfs_scrub_context * sc ,
2017-10-17 21:37:42 -07:00
struct xfs_inode * ip_in ,
2017-10-17 21:37:35 -07:00
int error )
{
2017-10-17 21:37:38 -07:00
xfs_scrub_ag_free ( sc , & sc - > sa ) ;
2017-10-17 21:37:35 -07:00
if ( sc - > tp ) {
xfs_trans_cancel ( sc - > tp ) ;
sc - > tp = NULL ;
}
2017-10-17 21:37:42 -07:00
if ( sc - > ip ) {
2018-01-16 18:53:57 -08:00
if ( sc - > ilock_flags )
xfs_iunlock ( sc - > ip , sc - > ilock_flags ) ;
2017-10-17 21:37:42 -07:00
if ( sc - > ip ! = ip_in & &
! xfs_internal_inum ( sc - > mp , sc - > ip - > i_ino ) )
iput ( VFS_I ( sc - > ip ) ) ;
sc - > ip = NULL ;
}
2017-10-17 21:37:45 -07:00
if ( sc - > buf ) {
kmem_free ( sc - > buf ) ;
sc - > buf = NULL ;
}
2017-10-17 21:37:35 -07:00
return error ;
}
/* Scrubbing dispatch. */
static const struct xfs_scrub_meta_ops meta_scrub_ops [ ] = {
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_PROBE ] = { /* ioctl presence test */
2018-01-08 10:41:34 -08:00
. type = ST_NONE ,
2017-10-17 21:37:36 -07:00
. setup = xfs_scrub_setup_fs ,
. scrub = xfs_scrub_probe ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_SB ] = { /* superblock */
2018-01-08 10:41:34 -08:00
. type = ST_PERAG ,
. setup = xfs_scrub_setup_fs ,
2017-10-17 21:37:38 -07:00
. scrub = xfs_scrub_superblock ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_AGF ] = { /* agf */
2018-01-08 10:41:34 -08:00
. type = ST_PERAG ,
. setup = xfs_scrub_setup_fs ,
2017-10-17 21:37:39 -07:00
. scrub = xfs_scrub_agf ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_AGFL ] = { /* agfl */
2018-01-08 10:41:34 -08:00
. type = ST_PERAG ,
. setup = xfs_scrub_setup_fs ,
2017-10-17 21:37:39 -07:00
. scrub = xfs_scrub_agfl ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_AGI ] = { /* agi */
2018-01-08 10:41:34 -08:00
. type = ST_PERAG ,
. setup = xfs_scrub_setup_fs ,
2017-10-17 21:37:39 -07:00
. scrub = xfs_scrub_agi ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_BNOBT ] = { /* bnobt */
2018-01-08 10:41:34 -08:00
. type = ST_PERAG ,
2017-10-17 21:37:40 -07:00
. setup = xfs_scrub_setup_ag_allocbt ,
. scrub = xfs_scrub_bnobt ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_CNTBT ] = { /* cntbt */
2018-01-08 10:41:34 -08:00
. type = ST_PERAG ,
2017-10-17 21:37:40 -07:00
. setup = xfs_scrub_setup_ag_allocbt ,
. scrub = xfs_scrub_cntbt ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_INOBT ] = { /* inobt */
2018-01-08 10:41:34 -08:00
. type = ST_PERAG ,
2017-10-17 21:37:40 -07:00
. setup = xfs_scrub_setup_ag_iallocbt ,
. scrub = xfs_scrub_inobt ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_FINOBT ] = { /* finobt */
2018-01-08 10:41:34 -08:00
. type = ST_PERAG ,
2017-10-17 21:37:40 -07:00
. setup = xfs_scrub_setup_ag_iallocbt ,
. scrub = xfs_scrub_finobt ,
. has = xfs_sb_version_hasfinobt ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_RMAPBT ] = { /* rmapbt */
2018-01-08 10:41:34 -08:00
. type = ST_PERAG ,
2017-10-17 21:37:41 -07:00
. setup = xfs_scrub_setup_ag_rmapbt ,
. scrub = xfs_scrub_rmapbt ,
. has = xfs_sb_version_hasrmapbt ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_REFCNTBT ] = { /* refcountbt */
2018-01-08 10:41:34 -08:00
. type = ST_PERAG ,
2017-10-17 21:37:41 -07:00
. setup = xfs_scrub_setup_ag_refcountbt ,
. scrub = xfs_scrub_refcountbt ,
. has = xfs_sb_version_hasreflink ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_INODE ] = { /* inode record */
2018-01-08 10:41:34 -08:00
. type = ST_INODE ,
2017-10-17 21:37:42 -07:00
. setup = xfs_scrub_setup_inode ,
. scrub = xfs_scrub_inode ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_BMBTD ] = { /* inode data fork */
2018-01-08 10:41:34 -08:00
. type = ST_INODE ,
2017-10-17 21:37:43 -07:00
. setup = xfs_scrub_setup_inode_bmap ,
. scrub = xfs_scrub_bmap_data ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_BMBTA ] = { /* inode attr fork */
2018-01-08 10:41:34 -08:00
. type = ST_INODE ,
2017-10-17 21:37:43 -07:00
. setup = xfs_scrub_setup_inode_bmap ,
. scrub = xfs_scrub_bmap_attr ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_BMBTC ] = { /* inode CoW fork */
2018-01-08 10:41:34 -08:00
. type = ST_INODE ,
2017-10-17 21:37:43 -07:00
. setup = xfs_scrub_setup_inode_bmap ,
. scrub = xfs_scrub_bmap_cow ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_DIR ] = { /* directory */
2018-01-08 10:41:34 -08:00
. type = ST_INODE ,
2017-10-17 21:37:44 -07:00
. setup = xfs_scrub_setup_directory ,
. scrub = xfs_scrub_directory ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_XATTR ] = { /* extended attributes */
2018-01-08 10:41:34 -08:00
. type = ST_INODE ,
2017-10-17 21:37:45 -07:00
. setup = xfs_scrub_setup_xattr ,
. scrub = xfs_scrub_xattr ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_SYMLINK ] = { /* symbolic link */
2018-01-08 10:41:34 -08:00
. type = ST_INODE ,
2017-10-17 21:37:45 -07:00
. setup = xfs_scrub_setup_symlink ,
. scrub = xfs_scrub_symlink ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_PARENT ] = { /* parent pointers */
2018-01-08 10:41:34 -08:00
. type = ST_INODE ,
2017-10-17 21:37:46 -07:00
. setup = xfs_scrub_setup_parent ,
. scrub = xfs_scrub_parent ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_RTBITMAP ] = { /* realtime bitmap */
2018-01-08 10:41:34 -08:00
. type = ST_FS ,
2017-10-17 21:37:46 -07:00
. setup = xfs_scrub_setup_rt ,
. scrub = xfs_scrub_rtbitmap ,
. has = xfs_sb_version_hasrealtime ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_RTSUM ] = { /* realtime summary */
2018-01-08 10:41:34 -08:00
. type = ST_FS ,
2017-10-17 21:37:46 -07:00
. setup = xfs_scrub_setup_rt ,
. scrub = xfs_scrub_rtsummary ,
. has = xfs_sb_version_hasrealtime ,
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_UQUOTA ] = { /* user quota */
2018-01-08 10:41:34 -08:00
. type = ST_FS ,
. setup = xfs_scrub_setup_quota ,
. scrub = xfs_scrub_quota ,
2017-10-17 21:37:47 -07:00
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_GQUOTA ] = { /* group quota */
2018-01-08 10:41:34 -08:00
. type = ST_FS ,
. setup = xfs_scrub_setup_quota ,
. scrub = xfs_scrub_quota ,
2017-10-17 21:37:47 -07:00
} ,
2018-01-08 10:41:33 -08:00
[ XFS_SCRUB_TYPE_PQUOTA ] = { /* project quota */
2018-01-08 10:41:34 -08:00
. type = ST_FS ,
. setup = xfs_scrub_setup_quota ,
. scrub = xfs_scrub_quota ,
2017-10-17 21:37:47 -07:00
} ,
2017-10-17 21:37:35 -07:00
} ;
/* This isn't a stable feature, warn once per day. */
static inline void
xfs_scrub_experimental_warning (
struct xfs_mount * mp )
{
static struct ratelimit_state scrub_warning = RATELIMIT_STATE_INIT (
" xfs_scrub_warning " , 86400 * HZ , 1 ) ;
ratelimit_set_flags ( & scrub_warning , RATELIMIT_MSG_ON_RELEASE ) ;
if ( __ratelimit ( & scrub_warning ) )
xfs_alert ( mp ,
" EXPERIMENTAL online scrub feature in use. Use at your own risk! " ) ;
}
2018-01-08 10:41:34 -08:00
static int
xfs_scrub_validate_inputs (
struct xfs_mount * mp ,
2017-10-17 21:37:34 -07:00
struct xfs_scrub_metadata * sm )
{
2018-01-08 10:41:34 -08:00
int error ;
2017-10-17 21:37:35 -07:00
const struct xfs_scrub_meta_ops * ops ;
error = - EINVAL ;
2018-01-08 10:41:34 -08:00
/* Check our inputs. */
2017-10-17 21:37:35 -07:00
sm - > sm_flags & = ~ XFS_SCRUB_FLAGS_OUT ;
if ( sm - > sm_flags & ~ XFS_SCRUB_FLAGS_IN )
goto out ;
2018-01-08 10:41:34 -08:00
/* sm_reserved[] must be zero */
2017-10-17 21:37:35 -07:00
if ( memchr_inv ( sm - > sm_reserved , 0 , sizeof ( sm - > sm_reserved ) ) )
goto out ;
error = - ENOENT ;
2018-01-08 10:41:34 -08:00
/* Do we know about this type of metadata? */
2017-10-17 21:37:35 -07:00
if ( sm - > sm_type > = XFS_SCRUB_TYPE_NR )
goto out ;
ops = & meta_scrub_ops [ sm - > sm_type ] ;
2018-01-08 10:41:33 -08:00
if ( ops - > setup = = NULL | | ops - > scrub = = NULL )
2017-10-17 21:37:35 -07:00
goto out ;
2018-01-08 10:41:34 -08:00
/* Does this fs even support this type of metadata? */
if ( ops - > has & & ! ops - > has ( & mp - > m_sb ) )
goto out ;
2017-10-17 21:37:35 -07:00
2018-01-08 10:41:34 -08:00
error = - EINVAL ;
/* restricting fields must be appropriate for type */
switch ( ops - > type ) {
case ST_NONE :
case ST_FS :
if ( sm - > sm_ino | | sm - > sm_gen | | sm - > sm_agno )
goto out ;
break ;
case ST_PERAG :
if ( sm - > sm_ino | | sm - > sm_gen | |
sm - > sm_agno > = mp - > m_sb . sb_agcount )
goto out ;
break ;
case ST_INODE :
if ( sm - > sm_agno | | ( sm - > sm_gen & & ! sm - > sm_ino ) )
goto out ;
break ;
default :
goto out ;
}
2018-01-08 10:41:34 -08:00
error = - EOPNOTSUPP ;
2017-10-17 21:37:35 -07:00
/*
* We won ' t scrub any filesystem that doesn ' t have the ability
* to record unwritten extents . The option was made default in
* 2003 , removed from mkfs in 2007 , and cannot be disabled in
* v5 , so if we find a filesystem without this flag it ' s either
* really old or totally unsupported . Avoid it either way .
* We also don ' t support v1 - v3 filesystems , which aren ' t
* mountable .
*/
if ( ! xfs_sb_version_hasextflgbit ( & mp - > m_sb ) )
goto out ;
/* We don't know how to repair anything yet. */
if ( sm - > sm_flags & XFS_SCRUB_IFLAG_REPAIR )
goto out ;
2018-01-08 10:41:34 -08:00
error = 0 ;
out :
return error ;
}
/* Dispatch metadata scrubbing. */
int
xfs_scrub_metadata (
struct xfs_inode * ip ,
struct xfs_scrub_metadata * sm )
{
struct xfs_scrub_context sc ;
struct xfs_mount * mp = ip - > i_mount ;
bool try_harder = false ;
int error = 0 ;
BUILD_BUG_ON ( sizeof ( meta_scrub_ops ) ! =
( sizeof ( struct xfs_scrub_meta_ops ) * XFS_SCRUB_TYPE_NR ) ) ;
trace_xfs_scrub_start ( ip , sm , error ) ;
/* Forbidden if we are shut down or mounted norecovery. */
error = - ESHUTDOWN ;
if ( XFS_FORCED_SHUTDOWN ( mp ) )
goto out ;
error = - ENOTRECOVERABLE ;
if ( mp - > m_flags & XFS_MOUNT_NORECOVERY )
goto out ;
error = xfs_scrub_validate_inputs ( mp , sm ) ;
if ( error )
goto out ;
2017-10-17 21:37:35 -07:00
xfs_scrub_experimental_warning ( mp ) ;
retry_op :
/* Set up for the operation. */
memset ( & sc , 0 , sizeof ( sc ) ) ;
sc . mp = ip - > i_mount ;
sc . sm = sm ;
2018-01-08 10:41:34 -08:00
sc . ops = & meta_scrub_ops [ sm - > sm_type ] ;
2017-10-17 21:37:35 -07:00
sc . try_harder = try_harder ;
2017-10-17 21:37:38 -07:00
sc . sa . agno = NULLAGNUMBER ;
2017-10-17 21:37:35 -07:00
error = sc . ops - > setup ( & sc , ip ) ;
if ( error )
goto out_teardown ;
/* Scrub for errors. */
error = sc . ops - > scrub ( & sc ) ;
if ( ! try_harder & & error = = - EDEADLOCK ) {
/*
* Scrubbers return - EDEADLOCK to mean ' try harder ' .
* Tear down everything we hold , then set up again with
* preparation for worst - case scenarios .
*/
2017-10-17 21:37:42 -07:00
error = xfs_scrub_teardown ( & sc , ip , 0 ) ;
2017-10-17 21:37:35 -07:00
if ( error )
goto out ;
try_harder = true ;
goto retry_op ;
} else if ( error )
goto out_teardown ;
if ( sc . sm - > sm_flags & ( XFS_SCRUB_OFLAG_CORRUPT |
XFS_SCRUB_OFLAG_XCORRUPT ) )
xfs_alert_ratelimited ( mp , " Corruption detected during scrub. " ) ;
out_teardown :
2017-10-17 21:37:42 -07:00
error = xfs_scrub_teardown ( & sc , ip , error ) ;
2017-10-17 21:37:35 -07:00
out :
trace_xfs_scrub_done ( ip , sm , error ) ;
if ( error = = - EFSCORRUPTED | | error = = - EFSBADCRC ) {
sm - > sm_flags | = XFS_SCRUB_OFLAG_CORRUPT ;
error = 0 ;
}
return error ;
2017-10-17 21:37:34 -07:00
}