2005-12-16 01:31:24 +03:00
/* -*- mode: c; c-basic-offset: 8; -*-
* vim : noexpandtab sw = 8 ts = 8 sts = 0 :
*
* super . c
*
* load / unload driver , mount / dismount volumes
*
* Copyright ( C ) 2002 , 2004 Oracle . All rights reserved .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation ; either
* version 2 of the License , or ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
* General Public License for more details .
*
* You should have received a copy of the GNU General Public
* License along with this program ; if not , write to the
* Free Software Foundation , Inc . , 59 Temple Place - Suite 330 ,
* Boston , MA 021110 - 1307 , USA .
*/
# include <linux/module.h>
# include <linux/fs.h>
# include <linux/types.h>
# include <linux/slab.h>
# include <linux/highmem.h>
# include <linux/init.h>
# include <linux/random.h>
# include <linux/statfs.h>
# include <linux/moduleparam.h>
# include <linux/blkdev.h>
# include <linux/socket.h>
# include <linux/inet.h>
# include <linux/parser.h>
# include <linux/crc32.h>
# include <linux/debugfs.h>
2007-09-07 00:34:16 +04:00
# include <linux/mount.h>
2008-01-30 03:59:56 +03:00
# include <linux/seq_file.h>
2008-08-21 22:13:17 +04:00
# include <linux/quotaops.h>
2011-05-26 20:02:08 +04:00
# include <linux/cleancache.h>
2005-12-16 01:31:24 +03:00
2011-02-21 06:13:14 +03:00
# define CREATE_TRACE_POINTS
# include "ocfs2_trace.h"
2005-12-16 01:31:24 +03:00
# include <cluster/masklog.h>
# include "ocfs2.h"
/* this should be the only file to include a version 1 header */
# include "ocfs1_fs_compat.h"
# include "alloc.h"
2011-06-23 01:23:38 +04:00
# include "aops.h"
2008-12-12 02:04:14 +03:00
# include "blockcheck.h"
2005-12-16 01:31:24 +03:00
# include "dlmglue.h"
# include "export.h"
# include "extent_map.h"
# include "heartbeat.h"
# include "inode.h"
# include "journal.h"
# include "localalloc.h"
# include "namei.h"
# include "slot_map.h"
# include "super.h"
# include "sysfile.h"
# include "uptodate.h"
2008-08-18 13:11:00 +04:00
# include "xattr.h"
2008-08-25 21:56:50 +04:00
# include "quota.h"
2009-08-24 07:13:37 +04:00
# include "refcounttree.h"
2010-01-25 09:11:06 +03:00
# include "suballoc.h"
2005-12-16 01:31:24 +03:00
# include "buffer_head_io.h"
2014-06-05 03:06:06 +04:00
static struct kmem_cache * ocfs2_inode_cachep ;
2008-08-25 21:56:50 +04:00
struct kmem_cache * ocfs2_dquot_cachep ;
struct kmem_cache * ocfs2_qf_chunk_cachep ;
2005-12-16 01:31:24 +03:00
2011-03-31 05:57:33 +04:00
/* OCFS2 needs to schedule several different types of work which
2005-12-16 01:31:24 +03:00
* require cluster locking , disk I / O , recovery waits , etc . Since these
* types of work tend to be heavy we avoid using the kernel events
* workqueue and schedule on our own . */
struct workqueue_struct * ocfs2_wq = NULL ;
2014-06-05 03:06:06 +04:00
static struct dentry * ocfs2_debugfs_root ;
2005-12-16 01:31:24 +03:00
MODULE_AUTHOR ( " Oracle " ) ;
MODULE_LICENSE ( " GPL " ) ;
2014-01-22 03:48:20 +04:00
MODULE_DESCRIPTION ( " OCFS2 cluster file system " ) ;
2005-12-16 01:31:24 +03:00
2007-09-07 20:16:10 +04:00
struct mount_options
{
2007-11-08 01:40:36 +03:00
unsigned long commit_interval ;
2007-09-07 20:16:10 +04:00
unsigned long mount_opt ;
unsigned int atime_quantum ;
signed short slot ;
2010-04-06 05:17:13 +04:00
int localalloc_opt ;
2009-12-08 00:10:48 +03:00
unsigned int resv_level ;
2010-04-06 05:17:16 +04:00
int dir_resv_level ;
2008-02-02 02:08:23 +03:00
char cluster_stack [ OCFS2_STACK_LABEL_LEN + 1 ] ;
2007-09-07 20:16:10 +04:00
} ;
2005-12-16 01:31:24 +03:00
static int ocfs2_parse_options ( struct super_block * sb , char * options ,
2007-09-07 20:16:10 +04:00
struct mount_options * mopt ,
2007-06-19 04:00:24 +04:00
int is_remount ) ;
2009-10-15 16:54:04 +04:00
static int ocfs2_check_set_options ( struct super_block * sb ,
struct mount_options * options ) ;
2011-12-09 06:32:45 +04:00
static int ocfs2_show_options ( struct seq_file * s , struct dentry * root ) ;
2005-12-16 01:31:24 +03:00
static void ocfs2_put_super ( struct super_block * sb ) ;
static int ocfs2_mount_volume ( struct super_block * sb ) ;
static int ocfs2_remount ( struct super_block * sb , int * flags , char * data ) ;
static void ocfs2_dismount_volume ( struct super_block * sb , int mnt_err ) ;
static int ocfs2_initialize_mem_caches ( void ) ;
static void ocfs2_free_mem_caches ( void ) ;
static void ocfs2_delete_osb ( struct ocfs2_super * osb ) ;
2006-06-23 13:02:58 +04:00
static int ocfs2_statfs ( struct dentry * dentry , struct kstatfs * buf ) ;
2005-12-16 01:31:24 +03:00
static int ocfs2_sync_fs ( struct super_block * sb , int wait ) ;
static int ocfs2_init_global_system_inodes ( struct ocfs2_super * osb ) ;
static int ocfs2_init_local_system_inodes ( struct ocfs2_super * osb ) ;
2007-09-26 22:10:04 +04:00
static void ocfs2_release_system_inodes ( struct ocfs2_super * osb ) ;
2005-12-16 01:31:24 +03:00
static int ocfs2_check_volume ( struct ocfs2_super * osb ) ;
static int ocfs2_verify_volume ( struct ocfs2_dinode * di ,
struct buffer_head * bh ,
2009-01-07 01:57:08 +03:00
u32 sectsize ,
struct ocfs2_blockcheck_stats * stats ) ;
2005-12-16 01:31:24 +03:00
static int ocfs2_initialize_super ( struct super_block * sb ,
struct buffer_head * bh ,
2009-01-07 01:57:08 +03:00
int sector_size ,
struct ocfs2_blockcheck_stats * stats ) ;
2005-12-16 01:31:24 +03:00
static int ocfs2_get_sector ( struct super_block * sb ,
struct buffer_head * * bh ,
int block ,
int sect_size ) ;
static struct inode * ocfs2_alloc_inode ( struct super_block * sb ) ;
static void ocfs2_destroy_inode ( struct inode * inode ) ;
2008-08-21 22:13:17 +04:00
static int ocfs2_susp_quotas ( struct ocfs2_super * osb , int unsuspend ) ;
static int ocfs2_enable_quotas ( struct ocfs2_super * osb ) ;
static void ocfs2_disable_quotas ( struct ocfs2_super * osb ) ;
2005-12-16 01:31:24 +03:00
2014-09-29 17:02:51 +04:00
static struct dquot * * ocfs2_get_dquots ( struct inode * inode )
{
return OCFS2_I ( inode ) - > i_dquot ;
}
2007-02-12 11:55:41 +03:00
static const struct super_operations ocfs2_sops = {
2005-12-16 01:31:24 +03:00
. statfs = ocfs2_statfs ,
. alloc_inode = ocfs2_alloc_inode ,
. destroy_inode = ocfs2_destroy_inode ,
. drop_inode = ocfs2_drop_inode ,
2010-06-09 05:28:10 +04:00
. evict_inode = ocfs2_evict_inode ,
2005-12-16 01:31:24 +03:00
. sync_fs = ocfs2_sync_fs ,
. put_super = ocfs2_put_super ,
. remount_fs = ocfs2_remount ,
2007-09-07 00:34:16 +04:00
. show_options = ocfs2_show_options ,
2008-08-25 21:56:50 +04:00
. quota_read = ocfs2_quota_read ,
. quota_write = ocfs2_quota_write ,
2014-09-29 17:02:51 +04:00
. get_dquots = ocfs2_get_dquots ,
2005-12-16 01:31:24 +03:00
} ;
enum {
Opt_barrier ,
Opt_err_panic ,
Opt_err_ro ,
Opt_intr ,
Opt_nointr ,
Opt_hb_none ,
Opt_hb_local ,
2010-10-08 02:23:50 +04:00
Opt_hb_global ,
2005-12-16 01:31:24 +03:00
Opt_data_ordered ,
Opt_data_writeback ,
2006-11-15 10:48:42 +03:00
Opt_atime_quantum ,
2007-06-19 04:00:24 +04:00
Opt_slot ,
2007-11-08 01:40:36 +03:00
Opt_commit ,
2007-12-21 01:58:11 +03:00
Opt_localalloc ,
2007-12-21 03:49:04 +03:00
Opt_localflocks ,
2008-02-02 02:08:23 +03:00
Opt_stack ,
2008-08-18 13:11:00 +04:00
Opt_user_xattr ,
Opt_nouser_xattr ,
2008-09-04 07:03:40 +04:00
Opt_inode64 ,
2008-11-14 06:17:52 +03:00
Opt_acl ,
Opt_noacl ,
2008-08-21 22:13:17 +04:00
Opt_usrquota ,
Opt_grpquota ,
2010-10-11 12:46:39 +04:00
Opt_coherency_buffered ,
Opt_coherency_full ,
2009-12-08 00:10:48 +03:00
Opt_resv_level ,
2010-04-06 05:17:16 +04:00
Opt_dir_resv_level ,
2015-02-11 01:09:04 +03:00
Opt_journal_async_commit ,
2015-09-05 01:44:11 +03:00
Opt_err_cont ,
2005-12-16 01:31:24 +03:00
Opt_err ,
} ;
2008-10-13 13:46:57 +04:00
static const match_table_t tokens = {
2005-12-16 01:31:24 +03:00
{ Opt_barrier , " barrier=%u " } ,
{ Opt_err_panic , " errors=panic " } ,
{ Opt_err_ro , " errors=remount-ro " } ,
{ Opt_intr , " intr " } ,
{ Opt_nointr , " nointr " } ,
{ Opt_hb_none , OCFS2_HB_NONE } ,
{ Opt_hb_local , OCFS2_HB_LOCAL } ,
2010-10-08 02:23:50 +04:00
{ Opt_hb_global , OCFS2_HB_GLOBAL } ,
2005-12-16 01:31:24 +03:00
{ Opt_data_ordered , " data=ordered " } ,
{ Opt_data_writeback , " data=writeback " } ,
2006-11-15 10:48:42 +03:00
{ Opt_atime_quantum , " atime_quantum=%u " } ,
2007-06-19 04:00:24 +04:00
{ Opt_slot , " preferred_slot=%u " } ,
2007-11-08 01:40:36 +03:00
{ Opt_commit , " commit=%u " } ,
2007-12-21 01:58:11 +03:00
{ Opt_localalloc , " localalloc=%d " } ,
2007-12-21 03:49:04 +03:00
{ Opt_localflocks , " localflocks " } ,
2008-02-02 02:08:23 +03:00
{ Opt_stack , " cluster_stack=%s " } ,
2008-08-18 13:11:00 +04:00
{ Opt_user_xattr , " user_xattr " } ,
{ Opt_nouser_xattr , " nouser_xattr " } ,
2008-09-04 07:03:40 +04:00
{ Opt_inode64 , " inode64 " } ,
2008-11-14 06:17:52 +03:00
{ Opt_acl , " acl " } ,
{ Opt_noacl , " noacl " } ,
2008-08-21 22:13:17 +04:00
{ Opt_usrquota , " usrquota " } ,
{ Opt_grpquota , " grpquota " } ,
2010-10-11 12:46:39 +04:00
{ Opt_coherency_buffered , " coherency=buffered " } ,
{ Opt_coherency_full , " coherency=full " } ,
2009-12-08 00:10:48 +03:00
{ Opt_resv_level , " resv_level=%u " } ,
2010-04-06 05:17:16 +04:00
{ Opt_dir_resv_level , " dir_resv_level=%u " } ,
2015-02-11 01:09:04 +03:00
{ Opt_journal_async_commit , " journal_async_commit " } ,
2015-09-05 01:44:11 +03:00
{ Opt_err_cont , " errors=continue " } ,
2005-12-16 01:31:24 +03:00
{ Opt_err , NULL }
} ;
2008-12-18 01:17:43 +03:00
# ifdef CONFIG_DEBUG_FS
static int ocfs2_osb_dump ( struct ocfs2_super * osb , char * buf , int len )
{
struct ocfs2_cluster_connection * cconn = osb - > cconn ;
struct ocfs2_recovery_map * rm = osb - > recovery_map ;
2009-06-22 22:40:07 +04:00
struct ocfs2_orphan_scan * os = & osb - > osb_orphan_scan ;
int i , out = 0 ;
2016-03-16 00:53:01 +03:00
unsigned long flags ;
2008-12-18 01:17:43 +03:00
out + = snprintf ( buf + out , len - out ,
" %10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s \n " ,
" Device " , osb - > dev_str , osb - > uuid_str ,
osb - > fs_generation , osb - > vol_label ) ;
out + = snprintf ( buf + out , len - out ,
" %10s => State: %d Flags: 0x%lX \n " , " Volume " ,
atomic_read ( & osb - > vol_state ) , osb - > osb_flags ) ;
out + = snprintf ( buf + out , len - out ,
" %10s => Block: %lu Cluster: %d \n " , " Sizes " ,
osb - > sb - > s_blocksize , osb - > s_clustersize ) ;
out + = snprintf ( buf + out , len - out ,
" %10s => Compat: 0x%X Incompat: 0x%X "
" ROcompat: 0x%X \n " ,
" Features " , osb - > s_feature_compat ,
osb - > s_feature_incompat , osb - > s_feature_ro_compat ) ;
out + = snprintf ( buf + out , len - out ,
" %10s => Opts: 0x%lX AtimeQuanta: %u \n " , " Mount " ,
osb - > s_mount_opt , osb - > s_atime_quantum ) ;
2009-06-20 01:45:55 +04:00
if ( cconn ) {
out + = snprintf ( buf + out , len - out ,
" %10s => Stack: %s Name: %*s "
" Version: %d.%d \n " , " Cluster " ,
( * osb - > osb_cluster_stack = = ' \0 ' ?
" o2cb " : osb - > osb_cluster_stack ) ,
cconn - > cc_namelen , cconn - > cc_name ,
cconn - > cc_version . pv_major ,
cconn - > cc_version . pv_minor ) ;
}
2008-12-18 01:17:43 +03:00
2016-03-16 00:53:01 +03:00
spin_lock_irqsave ( & osb - > dc_task_lock , flags ) ;
2008-12-18 01:17:43 +03:00
out + = snprintf ( buf + out , len - out ,
" %10s => Pid: %d Count: %lu WakeSeq: %lu "
" WorkSeq: %lu \n " , " DownCnvt " ,
2009-06-20 01:45:55 +04:00
( osb - > dc_task ? task_pid_nr ( osb - > dc_task ) : - 1 ) ,
osb - > blocked_lock_count , osb - > dc_wake_sequence ,
osb - > dc_work_sequence ) ;
2016-03-16 00:53:01 +03:00
spin_unlock_irqrestore ( & osb - > dc_task_lock , flags ) ;
2008-12-18 01:17:43 +03:00
spin_lock ( & osb - > osb_lock ) ;
out + = snprintf ( buf + out , len - out , " %10s => Pid: %d Nodes: " ,
" Recovery " ,
( osb - > recovery_thread_task ?
task_pid_nr ( osb - > recovery_thread_task ) : - 1 ) ) ;
if ( rm - > rm_used = = 0 )
out + = snprintf ( buf + out , len - out , " None \n " ) ;
else {
for ( i = 0 ; i < rm - > rm_used ; i + + )
out + = snprintf ( buf + out , len - out , " %d " ,
rm - > rm_entries [ i ] ) ;
out + = snprintf ( buf + out , len - out , " \n " ) ;
}
spin_unlock ( & osb - > osb_lock ) ;
out + = snprintf ( buf + out , len - out ,
2013-07-04 02:00:51 +04:00
" %10s => Pid: %d Interval: %lu \n " , " Commit " ,
2009-06-20 01:45:55 +04:00
( osb - > commit_task ? task_pid_nr ( osb - > commit_task ) : - 1 ) ,
2013-07-04 02:00:51 +04:00
osb - > osb_commit_interval ) ;
2008-12-18 01:17:43 +03:00
out + = snprintf ( buf + out , len - out ,
2009-06-20 01:45:55 +04:00
" %10s => State: %d TxnId: %lu NumTxns: %d \n " ,
2008-12-18 01:17:43 +03:00
" Journal " , osb - > journal - > j_state ,
2009-06-20 01:45:55 +04:00
osb - > journal - > j_trans_id ,
atomic_read ( & osb - > journal - > j_num_trans ) ) ;
2008-12-18 01:17:43 +03:00
out + = snprintf ( buf + out , len - out ,
" %10s => GlobalAllocs: %d LocalAllocs: %d "
" SubAllocs: %d LAWinMoves: %d SAExtends: %d \n " ,
" Stats " ,
atomic_read ( & osb - > alloc_stats . bitmap_data ) ,
atomic_read ( & osb - > alloc_stats . local_data ) ,
atomic_read ( & osb - > alloc_stats . bg_allocs ) ,
atomic_read ( & osb - > alloc_stats . moves ) ,
atomic_read ( & osb - > alloc_stats . bg_extends ) ) ;
out + = snprintf ( buf + out , len - out ,
" %10s => State: %u Descriptor: %llu Size: %u bits "
" Default: %u bits \n " ,
" LocalAlloc " , osb - > local_alloc_state ,
( unsigned long long ) osb - > la_last_gd ,
osb - > local_alloc_bits , osb - > local_alloc_default_bits ) ;
spin_lock ( & osb - > osb_lock ) ;
out + = snprintf ( buf + out , len - out ,
2010-01-25 09:11:06 +03:00
" %10s => InodeSlot: %d StolenInodes: %d, "
" MetaSlot: %d StolenMeta: %d \n " , " Steal " ,
2008-12-18 01:17:43 +03:00
osb - > s_inode_steal_slot ,
2010-01-25 09:11:06 +03:00
atomic_read ( & osb - > s_num_inodes_stolen ) ,
osb - > s_meta_steal_slot ,
atomic_read ( & osb - > s_num_meta_stolen ) ) ;
2008-12-18 01:17:43 +03:00
spin_unlock ( & osb - > osb_lock ) ;
2009-06-22 22:40:07 +04:00
out + = snprintf ( buf + out , len - out , " OrphanScan => " ) ;
out + = snprintf ( buf + out , len - out , " Local: %u Global: %u " ,
os - > os_count , os - > os_seqno ) ;
out + = snprintf ( buf + out , len - out , " Last Scan: " ) ;
if ( atomic_read ( & os - > os_state ) = = ORPHAN_SCAN_INACTIVE )
out + = snprintf ( buf + out , len - out , " Disabled \n " ) ;
else
out + = snprintf ( buf + out , len - out , " %lu seconds ago \n " ,
( get_seconds ( ) - os - > os_scantime . tv_sec ) ) ;
2008-12-18 01:17:43 +03:00
out + = snprintf ( buf + out , len - out , " %10s => %3s %10s \n " ,
" Slots " , " Num " , " RecoGen " ) ;
for ( i = 0 ; i < osb - > max_slots ; + + i ) {
out + = snprintf ( buf + out , len - out ,
" %10s %c %3d %10d \n " ,
" " ,
( i = = osb - > slot_num ? ' * ' : ' ' ) ,
i , osb - > slot_recovery_generations [ i ] ) ;
}
return out ;
}
static int ocfs2_osb_debug_open ( struct inode * inode , struct file * file )
{
struct ocfs2_super * osb = inode - > i_private ;
char * buf = NULL ;
buf = kmalloc ( PAGE_SIZE , GFP_KERNEL ) ;
if ( ! buf )
goto bail ;
i_size_write ( inode , ocfs2_osb_dump ( osb , buf , PAGE_SIZE ) ) ;
file - > private_data = buf ;
return 0 ;
bail :
return - ENOMEM ;
}
static int ocfs2_debug_release ( struct inode * inode , struct file * file )
{
kfree ( file - > private_data ) ;
return 0 ;
}
static ssize_t ocfs2_debug_read ( struct file * file , char __user * buf ,
size_t nbytes , loff_t * ppos )
{
return simple_read_from_buffer ( buf , nbytes , ppos , file - > private_data ,
i_size_read ( file - > f_mapping - > host ) ) ;
}
# else
static int ocfs2_osb_debug_open ( struct inode * inode , struct file * file )
{
return 0 ;
}
static int ocfs2_debug_release ( struct inode * inode , struct file * file )
{
return 0 ;
}
static ssize_t ocfs2_debug_read ( struct file * file , char __user * buf ,
size_t nbytes , loff_t * ppos )
{
return 0 ;
}
# endif /* CONFIG_DEBUG_FS */
2009-10-02 02:43:56 +04:00
static const struct file_operations ocfs2_osb_debug_fops = {
2008-12-18 01:17:43 +03:00
. open = ocfs2_osb_debug_open ,
. release = ocfs2_debug_release ,
. read = ocfs2_debug_read ,
. llseek = generic_file_llseek ,
} ;
2005-12-16 01:31:24 +03:00
static int ocfs2_sync_fs ( struct super_block * sb , int wait )
{
2007-09-26 22:10:04 +04:00
int status ;
2005-12-16 01:31:24 +03:00
tid_t target ;
struct ocfs2_super * osb = OCFS2_SB ( sb ) ;
if ( ocfs2_is_hard_readonly ( osb ) )
return - EROFS ;
if ( wait ) {
status = ocfs2_flush_truncate_log ( osb ) ;
if ( status < 0 )
mlog_errno ( status ) ;
} else {
ocfs2_schedule_truncate_log_flush ( osb , 0 ) ;
}
2008-09-04 07:03:41 +04:00
if ( jbd2_journal_start_commit ( OCFS2_SB ( sb ) - > journal - > j_journal ,
& target ) ) {
2005-12-16 01:31:24 +03:00
if ( wait )
2008-09-04 07:03:41 +04:00
jbd2_log_wait_commit ( OCFS2_SB ( sb ) - > journal - > j_journal ,
target ) ;
2005-12-16 01:31:24 +03:00
}
return 0 ;
}
2008-08-20 17:43:36 +04:00
static int ocfs2_need_system_inode ( struct ocfs2_super * osb , int ino )
{
if ( ! OCFS2_HAS_RO_COMPAT_FEATURE ( osb - > sb , OCFS2_FEATURE_RO_COMPAT_USRQUOTA )
& & ( ino = = USER_QUOTA_SYSTEM_INODE
| | ino = = LOCAL_USER_QUOTA_SYSTEM_INODE ) )
return 0 ;
if ( ! OCFS2_HAS_RO_COMPAT_FEATURE ( osb - > sb , OCFS2_FEATURE_RO_COMPAT_GRPQUOTA )
& & ( ino = = GROUP_QUOTA_SYSTEM_INODE
| | ino = = LOCAL_GROUP_QUOTA_SYSTEM_INODE ) )
return 0 ;
return 1 ;
}
2005-12-16 01:31:24 +03:00
static int ocfs2_init_global_system_inodes ( struct ocfs2_super * osb )
{
struct inode * new = NULL ;
int status = 0 ;
int i ;
2008-01-11 02:11:45 +03:00
new = ocfs2_iget ( osb , osb - > root_blkno , OCFS2_FI_FLAG_SYSFILE , 0 ) ;
2005-12-16 01:31:24 +03:00
if ( IS_ERR ( new ) ) {
status = PTR_ERR ( new ) ;
mlog_errno ( status ) ;
goto bail ;
}
osb - > root_inode = new ;
2008-01-11 02:11:45 +03:00
new = ocfs2_iget ( osb , osb - > system_dir_blkno , OCFS2_FI_FLAG_SYSFILE , 0 ) ;
2005-12-16 01:31:24 +03:00
if ( IS_ERR ( new ) ) {
status = PTR_ERR ( new ) ;
mlog_errno ( status ) ;
goto bail ;
}
osb - > sys_root_inode = new ;
for ( i = OCFS2_FIRST_ONLINE_SYSTEM_INODE ;
i < = OCFS2_LAST_GLOBAL_SYSTEM_INODE ; i + + ) {
2008-08-20 17:43:36 +04:00
if ( ! ocfs2_need_system_inode ( osb , i ) )
continue ;
2005-12-16 01:31:24 +03:00
new = ocfs2_get_system_file_inode ( osb , i , osb - > slot_num ) ;
if ( ! new ) {
ocfs2_release_system_inodes ( osb ) ;
status = - EINVAL ;
mlog_errno ( status ) ;
/* FIXME: Should ERROR_RO_FS */
mlog ( ML_ERROR , " Unable to load system inode %d, "
" possibly corrupt fs? " , i ) ;
goto bail ;
}
// the array now has one ref, so drop this one
iput ( new ) ;
}
bail :
2011-03-07 11:43:21 +03:00
if ( status )
mlog_errno ( status ) ;
2005-12-16 01:31:24 +03:00
return status ;
}
static int ocfs2_init_local_system_inodes ( struct ocfs2_super * osb )
{
struct inode * new = NULL ;
int status = 0 ;
int i ;
for ( i = OCFS2_LAST_GLOBAL_SYSTEM_INODE + 1 ;
i < NUM_SYSTEM_INODES ;
i + + ) {
2008-08-20 17:43:36 +04:00
if ( ! ocfs2_need_system_inode ( osb , i ) )
continue ;
2005-12-16 01:31:24 +03:00
new = ocfs2_get_system_file_inode ( osb , i , osb - > slot_num ) ;
if ( ! new ) {
ocfs2_release_system_inodes ( osb ) ;
status = - EINVAL ;
mlog ( ML_ERROR , " status=%d, sysfile=%d, slot=%d \n " ,
status , i , osb - > slot_num ) ;
goto bail ;
}
/* the array now has one ref, so drop this one */
iput ( new ) ;
}
bail :
2011-03-07 11:43:21 +03:00
if ( status )
mlog_errno ( status ) ;
2005-12-16 01:31:24 +03:00
return status ;
}
2007-09-26 22:10:04 +04:00
static void ocfs2_release_system_inodes ( struct ocfs2_super * osb )
2005-12-16 01:31:24 +03:00
{
2007-09-26 22:10:04 +04:00
int i ;
2005-12-16 01:31:24 +03:00
struct inode * inode ;
ocfs2: Cache system inodes of other slots.
Durring orphan scan, if we are slot 0, and we are replaying
orphan_dir:0001, the general process is that for every file
in this dir:
1. we will iget orphan_dir:0001, since there is no inode for it.
we will have to create an inode and read it from the disk.
2. do the normal work, such as delete_inode and remove it from
the dir if it is allowed.
3. call iput orphan_dir:0001 when we are done. In this case,
since we have no dcache for this inode, i_count will
reach 0, and VFS will have to call clear_inode and in
ocfs2_clear_inode we will checkpoint the inode which will let
ocfs2_cmt and journald begin to work.
4. We loop back to 1 for the next file.
So you see, actually for every deleted file, we have to read the
orphan dir from the disk and checkpoint the journal. It is very
time consuming and cause a lot of journal checkpoint I/O.
A better solution is that we can have another reference for these
inodes in ocfs2_super. So if there is no other race among
nodes(which will let dlmglue to checkpoint the inode), for step 3,
clear_inode won't be called and for step 1, we may only need to
read the inode for the 1st time. This is a big win for us.
So this patch will try to cache system inodes of other slots so
that we will have one more reference for these inodes and avoid
the extra inode read and journal checkpoint.
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
2010-08-16 12:58:21 +04:00
for ( i = 0 ; i < NUM_GLOBAL_SYSTEM_INODES ; i + + ) {
inode = osb - > global_system_inodes [ i ] ;
2005-12-16 01:31:24 +03:00
if ( inode ) {
iput ( inode ) ;
ocfs2: Cache system inodes of other slots.
Durring orphan scan, if we are slot 0, and we are replaying
orphan_dir:0001, the general process is that for every file
in this dir:
1. we will iget orphan_dir:0001, since there is no inode for it.
we will have to create an inode and read it from the disk.
2. do the normal work, such as delete_inode and remove it from
the dir if it is allowed.
3. call iput orphan_dir:0001 when we are done. In this case,
since we have no dcache for this inode, i_count will
reach 0, and VFS will have to call clear_inode and in
ocfs2_clear_inode we will checkpoint the inode which will let
ocfs2_cmt and journald begin to work.
4. We loop back to 1 for the next file.
So you see, actually for every deleted file, we have to read the
orphan dir from the disk and checkpoint the journal. It is very
time consuming and cause a lot of journal checkpoint I/O.
A better solution is that we can have another reference for these
inodes in ocfs2_super. So if there is no other race among
nodes(which will let dlmglue to checkpoint the inode), for step 3,
clear_inode won't be called and for step 1, we may only need to
read the inode for the 1st time. This is a big win for us.
So this patch will try to cache system inodes of other slots so
that we will have one more reference for these inodes and avoid
the extra inode read and journal checkpoint.
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
2010-08-16 12:58:21 +04:00
osb - > global_system_inodes [ i ] = NULL ;
2005-12-16 01:31:24 +03:00
}
}
inode = osb - > sys_root_inode ;
if ( inode ) {
iput ( inode ) ;
osb - > sys_root_inode = NULL ;
}
inode = osb - > root_inode ;
if ( inode ) {
iput ( inode ) ;
osb - > root_inode = NULL ;
}
ocfs2: Cache system inodes of other slots.
Durring orphan scan, if we are slot 0, and we are replaying
orphan_dir:0001, the general process is that for every file
in this dir:
1. we will iget orphan_dir:0001, since there is no inode for it.
we will have to create an inode and read it from the disk.
2. do the normal work, such as delete_inode and remove it from
the dir if it is allowed.
3. call iput orphan_dir:0001 when we are done. In this case,
since we have no dcache for this inode, i_count will
reach 0, and VFS will have to call clear_inode and in
ocfs2_clear_inode we will checkpoint the inode which will let
ocfs2_cmt and journald begin to work.
4. We loop back to 1 for the next file.
So you see, actually for every deleted file, we have to read the
orphan dir from the disk and checkpoint the journal. It is very
time consuming and cause a lot of journal checkpoint I/O.
A better solution is that we can have another reference for these
inodes in ocfs2_super. So if there is no other race among
nodes(which will let dlmglue to checkpoint the inode), for step 3,
clear_inode won't be called and for step 1, we may only need to
read the inode for the 1st time. This is a big win for us.
So this patch will try to cache system inodes of other slots so
that we will have one more reference for these inodes and avoid
the extra inode read and journal checkpoint.
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
2010-08-16 12:58:21 +04:00
if ( ! osb - > local_system_inodes )
2011-03-07 11:43:21 +03:00
return ;
ocfs2: Cache system inodes of other slots.
Durring orphan scan, if we are slot 0, and we are replaying
orphan_dir:0001, the general process is that for every file
in this dir:
1. we will iget orphan_dir:0001, since there is no inode for it.
we will have to create an inode and read it from the disk.
2. do the normal work, such as delete_inode and remove it from
the dir if it is allowed.
3. call iput orphan_dir:0001 when we are done. In this case,
since we have no dcache for this inode, i_count will
reach 0, and VFS will have to call clear_inode and in
ocfs2_clear_inode we will checkpoint the inode which will let
ocfs2_cmt and journald begin to work.
4. We loop back to 1 for the next file.
So you see, actually for every deleted file, we have to read the
orphan dir from the disk and checkpoint the journal. It is very
time consuming and cause a lot of journal checkpoint I/O.
A better solution is that we can have another reference for these
inodes in ocfs2_super. So if there is no other race among
nodes(which will let dlmglue to checkpoint the inode), for step 3,
clear_inode won't be called and for step 1, we may only need to
read the inode for the 1st time. This is a big win for us.
So this patch will try to cache system inodes of other slots so
that we will have one more reference for these inodes and avoid
the extra inode read and journal checkpoint.
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
2010-08-16 12:58:21 +04:00
for ( i = 0 ; i < NUM_LOCAL_SYSTEM_INODES * osb - > max_slots ; i + + ) {
if ( osb - > local_system_inodes [ i ] ) {
iput ( osb - > local_system_inodes [ i ] ) ;
osb - > local_system_inodes [ i ] = NULL ;
}
}
kfree ( osb - > local_system_inodes ) ;
osb - > local_system_inodes = NULL ;
2005-12-16 01:31:24 +03:00
}
/* We're allocating fs objects, use GFP_NOFS */
static struct inode * ocfs2_alloc_inode ( struct super_block * sb )
{
struct ocfs2_inode_info * oi ;
2006-12-07 07:33:14 +03:00
oi = kmem_cache_alloc ( ocfs2_inode_cachep , GFP_NOFS ) ;
2005-12-16 01:31:24 +03:00
if ( ! oi )
return NULL ;
2014-04-04 01:46:48 +04:00
oi - > i_sync_tid = 0 ;
oi - > i_datasync_tid = 0 ;
2014-09-29 17:02:51 +04:00
memset ( & oi - > i_dquot , 0 , sizeof ( oi - > i_dquot ) ) ;
2014-04-04 01:46:48 +04:00
2008-09-04 07:03:41 +04:00
jbd2_journal_init_jbd_inode ( & oi - > ip_jinode , & oi - > vfs_inode ) ;
2005-12-16 01:31:24 +03:00
return & oi - > vfs_inode ;
}
2011-01-07 09:49:49 +03:00
static void ocfs2_i_callback ( struct rcu_head * head )
2005-12-16 01:31:24 +03:00
{
2011-01-07 09:49:49 +03:00
struct inode * inode = container_of ( head , struct inode , i_rcu ) ;
2005-12-16 01:31:24 +03:00
kmem_cache_free ( ocfs2_inode_cachep , OCFS2_I ( inode ) ) ;
}
2011-01-07 09:49:49 +03:00
static void ocfs2_destroy_inode ( struct inode * inode )
{
call_rcu ( & inode - > i_rcu , ocfs2_i_callback ) ;
}
2007-07-20 23:56:16 +04:00
static unsigned long long ocfs2_max_file_offset ( unsigned int bbits ,
unsigned int cbits )
2005-12-16 01:31:24 +03:00
{
2007-07-20 23:56:16 +04:00
unsigned int bytes = 1 < < cbits ;
unsigned int trim = bytes ;
unsigned int bitshift = 32 ;
/*
* i_size and all block offsets in ocfs2 are always 64 bits
* wide . i_clusters is 32 bits , in cluster - sized units . So on
* 64 bit platforms , cluster size will be the limiting factor .
2005-12-16 01:31:24 +03:00
*/
# if BITS_PER_LONG == 32
2009-06-19 10:08:50 +04:00
# if defined(CONFIG_LBDAF)
2006-10-11 12:22:05 +04:00
BUILD_BUG_ON ( sizeof ( sector_t ) ! = 8 ) ;
2007-07-20 23:56:16 +04:00
/*
* We might be limited by page cache size .
*/
if ( bytes > PAGE_CACHE_SIZE ) {
bytes = PAGE_CACHE_SIZE ;
trim = 1 ;
/*
* Shift by 31 here so that we don ' t get larger than
* MAX_LFS_FILESIZE
*/
bitshift = 31 ;
}
2005-12-16 01:31:24 +03:00
# else
2007-07-20 23:56:16 +04:00
/*
* We are limited by the size of sector_t . Use block size , as
* that ' s what we expose to the VFS .
*/
bytes = 1 < < bbits ;
trim = 1 ;
bitshift = 31 ;
2005-12-16 01:31:24 +03:00
# endif
# endif
2007-07-20 23:56:16 +04:00
/*
* Trim by a whole cluster when we can actually approach the
* on - disk limits . Otherwise we can overflow i_clusters when
* an extent start is at the max offset .
*/
return ( ( ( unsigned long long ) bytes ) < < bitshift ) - trim ;
2005-12-16 01:31:24 +03:00
}
static int ocfs2_remount ( struct super_block * sb , int * flags , char * data )
{
int incompat_features ;
int ret = 0 ;
2007-09-07 20:16:10 +04:00
struct mount_options parsed_options ;
2005-12-16 01:31:24 +03:00
struct ocfs2_super * osb = OCFS2_SB ( sb ) ;
2010-10-08 02:23:50 +04:00
u32 tmp ;
2005-12-16 01:31:24 +03:00
2014-03-13 18:14:33 +04:00
sync_filesystem ( sb ) ;
2009-10-15 16:54:04 +04:00
if ( ! ocfs2_parse_options ( sb , data , & parsed_options , 1 ) | |
! ocfs2_check_set_options ( sb , & parsed_options ) ) {
2005-12-16 01:31:24 +03:00
ret = - EINVAL ;
goto out ;
}
2010-10-08 02:23:50 +04:00
tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
OCFS2_MOUNT_HB_NONE ;
if ( ( osb - > s_mount_opt & tmp ) ! = ( parsed_options . mount_opt & tmp ) ) {
2005-12-16 01:31:24 +03:00
ret = - EINVAL ;
mlog ( ML_ERROR , " Cannot change heartbeat mode on remount \n " ) ;
goto out ;
}
if ( ( osb - > s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ) ! =
2007-09-07 20:16:10 +04:00
( parsed_options . mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ) ) {
2005-12-16 01:31:24 +03:00
ret = - EINVAL ;
mlog ( ML_ERROR , " Cannot change data mode on remount \n " ) ;
goto out ;
}
2008-09-04 07:03:40 +04:00
/* Probably don't want this on remount; it might
* mess with other nodes */
if ( ! ( osb - > s_mount_opt & OCFS2_MOUNT_INODE64 ) & &
( parsed_options . mount_opt & OCFS2_MOUNT_INODE64 ) ) {
ret = - EINVAL ;
mlog ( ML_ERROR , " Cannot enable inode64 on remount \n " ) ;
goto out ;
}
2005-12-16 01:31:24 +03:00
/* We're going to/from readonly mode. */
if ( ( * flags & MS_RDONLY ) ! = ( sb - > s_flags & MS_RDONLY ) ) {
2008-08-21 22:13:17 +04:00
/* Disable quota accounting before remounting RO */
if ( * flags & MS_RDONLY ) {
ret = ocfs2_susp_quotas ( osb , 0 ) ;
if ( ret < 0 )
goto out ;
}
2005-12-16 01:31:24 +03:00
/* Lock here so the check of HARD_RO and the potential
* setting of SOFT_RO is atomic . */
spin_lock ( & osb - > osb_lock ) ;
if ( osb - > osb_flags & OCFS2_OSB_HARD_RO ) {
mlog ( ML_ERROR , " Remount on readonly device is forbidden. \n " ) ;
ret = - EROFS ;
goto unlock_osb ;
}
if ( * flags & MS_RDONLY ) {
sb - > s_flags | = MS_RDONLY ;
osb - > osb_flags | = OCFS2_OSB_SOFT_RO ;
} else {
if ( osb - > osb_flags & OCFS2_OSB_ERROR_FS ) {
mlog ( ML_ERROR , " Cannot remount RDWR "
" filesystem due to previous errors. \n " ) ;
ret = - EROFS ;
goto unlock_osb ;
}
incompat_features = OCFS2_HAS_RO_COMPAT_FEATURE ( sb , ~ OCFS2_FEATURE_RO_COMPAT_SUPP ) ;
if ( incompat_features ) {
mlog ( ML_ERROR , " Cannot remount RDWR because "
" of unsupported optional features "
" (%x). \n " , incompat_features ) ;
ret = - EINVAL ;
goto unlock_osb ;
}
sb - > s_flags & = ~ MS_RDONLY ;
osb - > osb_flags & = ~ OCFS2_OSB_SOFT_RO ;
}
2011-02-23 16:29:08 +03:00
trace_ocfs2_remount ( sb - > s_flags , osb - > osb_flags , * flags ) ;
2005-12-16 01:31:24 +03:00
unlock_osb :
spin_unlock ( & osb - > osb_lock ) ;
2008-08-21 22:13:17 +04:00
/* Enable quota accounting after remounting RW */
if ( ! ret & & ! ( * flags & MS_RDONLY ) ) {
if ( sb_any_quota_suspended ( sb ) )
ret = ocfs2_susp_quotas ( osb , 1 ) ;
else
ret = ocfs2_enable_quotas ( osb ) ;
if ( ret < 0 ) {
/* Return back changes... */
spin_lock ( & osb - > osb_lock ) ;
sb - > s_flags | = MS_RDONLY ;
osb - > osb_flags | = OCFS2_OSB_SOFT_RO ;
spin_unlock ( & osb - > osb_lock ) ;
goto out ;
}
}
2005-12-16 01:31:24 +03:00
}
if ( ! ret ) {
/* Only save off the new mount options in case of a successful
* remount . */
2007-09-07 20:16:10 +04:00
osb - > s_mount_opt = parsed_options . mount_opt ;
osb - > s_atime_quantum = parsed_options . atime_quantum ;
osb - > preferred_slot = parsed_options . slot ;
2007-11-08 01:40:36 +03:00
if ( parsed_options . commit_interval )
osb - > osb_commit_interval = parsed_options . commit_interval ;
2007-11-08 01:21:45 +03:00
if ( ! ocfs2_is_hard_readonly ( osb ) )
ocfs2_set_journal_params ( osb ) ;
2009-10-15 16:54:05 +04:00
sb - > s_flags = ( sb - > s_flags & ~ MS_POSIXACL ) |
( ( osb - > s_mount_opt & OCFS2_MOUNT_POSIX_ACL ) ?
MS_POSIXACL : 0 ) ;
2005-12-16 01:31:24 +03:00
}
out :
return ret ;
}
static int ocfs2_sb_probe ( struct super_block * sb ,
struct buffer_head * * bh ,
2009-01-07 01:57:08 +03:00
int * sector_size ,
struct ocfs2_blockcheck_stats * stats )
2005-12-16 01:31:24 +03:00
{
2007-09-26 22:10:04 +04:00
int status , tmpstat ;
2005-12-16 01:31:24 +03:00
struct ocfs1_vol_disk_hdr * hdr ;
struct ocfs2_dinode * di ;
int blksize ;
* bh = NULL ;
/* may be > 512 */
2009-05-23 01:17:49 +04:00
* sector_size = bdev_logical_block_size ( sb - > s_bdev ) ;
2005-12-16 01:31:24 +03:00
if ( * sector_size > OCFS2_MAX_BLOCKSIZE ) {
mlog ( ML_ERROR , " Hardware sector size too large: %d (max=%d) \n " ,
* sector_size , OCFS2_MAX_BLOCKSIZE ) ;
status = - EINVAL ;
goto bail ;
}
/* Can this really happen? */
if ( * sector_size < OCFS2_MIN_BLOCKSIZE )
* sector_size = OCFS2_MIN_BLOCKSIZE ;
/* check block zero for old format */
status = ocfs2_get_sector ( sb , bh , 0 , * sector_size ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
hdr = ( struct ocfs1_vol_disk_hdr * ) ( * bh ) - > b_data ;
if ( hdr - > major_version = = OCFS1_MAJOR_VERSION ) {
mlog ( ML_ERROR , " incompatible version: %u.%u \n " ,
hdr - > major_version , hdr - > minor_version ) ;
status = - EINVAL ;
}
if ( memcmp ( hdr - > signature , OCFS1_VOLUME_SIGNATURE ,
strlen ( OCFS1_VOLUME_SIGNATURE ) ) = = 0 ) {
mlog ( ML_ERROR , " incompatible volume signature: %8s \n " ,
hdr - > signature ) ;
status = - EINVAL ;
}
brelse ( * bh ) ;
* bh = NULL ;
if ( status < 0 ) {
mlog ( ML_ERROR , " This is an ocfs v1 filesystem which must be "
" upgraded before mounting with ocfs v2 \n " ) ;
goto bail ;
}
/*
* Now check at magic offset for 512 , 1024 , 2048 , 4096
* blocksizes . 4096 is the maximum blocksize because it is
* the minimum clustersize .
*/
status = - EINVAL ;
for ( blksize = * sector_size ;
blksize < = OCFS2_MAX_BLOCKSIZE ;
blksize < < = 1 ) {
tmpstat = ocfs2_get_sector ( sb , bh ,
OCFS2_SUPER_BLOCK_BLKNO ,
blksize ) ;
if ( tmpstat < 0 ) {
status = tmpstat ;
mlog_errno ( status ) ;
2009-10-29 08:28:24 +03:00
break ;
2005-12-16 01:31:24 +03:00
}
di = ( struct ocfs2_dinode * ) ( * bh ) - > b_data ;
2009-01-07 01:57:08 +03:00
memset ( stats , 0 , sizeof ( struct ocfs2_blockcheck_stats ) ) ;
2009-07-22 15:17:19 +04:00
spin_lock_init ( & stats - > b_lock ) ;
2009-10-29 08:28:24 +03:00
tmpstat = ocfs2_verify_volume ( di , * bh , blksize , stats ) ;
if ( tmpstat < 0 ) {
brelse ( * bh ) ;
* bh = NULL ;
}
if ( tmpstat ! = - EAGAIN ) {
status = tmpstat ;
2005-12-16 01:31:24 +03:00
break ;
2009-10-29 08:28:24 +03:00
}
2005-12-16 01:31:24 +03:00
}
bail :
return status ;
}
2006-12-06 04:56:35 +03:00
static int ocfs2_verify_heartbeat ( struct ocfs2_super * osb )
{
2010-10-08 02:23:50 +04:00
u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL ;
if ( osb - > s_mount_opt & hb_enabled ) {
if ( ocfs2_mount_local ( osb ) ) {
2006-12-06 04:56:35 +03:00
mlog ( ML_ERROR , " Cannot heartbeat on a locally "
" mounted device. \n " ) ;
return - EINVAL ;
}
2010-10-08 02:23:50 +04:00
if ( ocfs2_userspace_stack ( osb ) ) {
2008-02-02 02:08:23 +03:00
mlog ( ML_ERROR , " Userspace stack expected, but "
" o2cb heartbeat arguments passed to mount \n " ) ;
return - EINVAL ;
}
2010-10-08 02:23:50 +04:00
if ( ( ( osb - > s_mount_opt & OCFS2_MOUNT_HB_GLOBAL ) & &
! ocfs2_cluster_o2cb_global_heartbeat ( osb ) ) | |
( ( osb - > s_mount_opt & OCFS2_MOUNT_HB_LOCAL ) & &
ocfs2_cluster_o2cb_global_heartbeat ( osb ) ) ) {
mlog ( ML_ERROR , " Mismatching o2cb heartbeat modes \n " ) ;
return - EINVAL ;
}
2008-02-02 02:08:23 +03:00
}
2010-10-08 02:23:50 +04:00
if ( ! ( osb - > s_mount_opt & hb_enabled ) ) {
2008-02-02 02:08:23 +03:00
if ( ! ocfs2_mount_local ( osb ) & & ! ocfs2_is_hard_readonly ( osb ) & &
! ocfs2_userspace_stack ( osb ) ) {
2006-12-06 04:56:35 +03:00
mlog ( ML_ERROR , " Heartbeat has to be started to mount "
" a read-write clustered device. \n " ) ;
return - EINVAL ;
}
}
return 0 ;
}
2008-02-02 02:08:23 +03:00
/*
* If we ' re using a userspace stack , mount should have passed
* a name that matches the disk . If not , mount should not
* have passed a stack .
*/
static int ocfs2_verify_userspace_stack ( struct ocfs2_super * osb ,
struct mount_options * mopt )
{
if ( ! ocfs2_userspace_stack ( osb ) & & mopt - > cluster_stack [ 0 ] ) {
mlog ( ML_ERROR ,
" cluster stack passed to mount, but this filesystem "
" does not support it \n " ) ;
return - EINVAL ;
}
if ( ocfs2_userspace_stack ( osb ) & &
strncmp ( osb - > osb_cluster_stack , mopt - > cluster_stack ,
OCFS2_STACK_LABEL_LEN ) ) {
mlog ( ML_ERROR ,
" cluster stack passed to mount ( \" %s \" ) does not "
" match the filesystem ( \" %s \" ) \n " ,
mopt - > cluster_stack ,
osb - > osb_cluster_stack ) ;
return - EINVAL ;
}
return 0 ;
}
2008-08-21 22:13:17 +04:00
static int ocfs2_susp_quotas ( struct ocfs2_super * osb , int unsuspend )
{
int type ;
struct super_block * sb = osb - > sb ;
2014-09-10 23:06:39 +04:00
unsigned int feature [ OCFS2_MAXQUOTAS ] = {
OCFS2_FEATURE_RO_COMPAT_USRQUOTA ,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA } ;
2008-08-21 22:13:17 +04:00
int status = 0 ;
2014-09-10 23:06:39 +04:00
for ( type = 0 ; type < OCFS2_MAXQUOTAS ; type + + ) {
2008-08-21 22:13:17 +04:00
if ( ! OCFS2_HAS_RO_COMPAT_FEATURE ( sb , feature [ type ] ) )
continue ;
if ( unsuspend )
2010-05-19 15:16:41 +04:00
status = dquot_resume ( sb , type ) ;
2010-05-14 00:14:53 +04:00
else {
struct ocfs2_mem_dqinfo * oinfo ;
/* Cancel periodic syncing before suspending */
oinfo = sb_dqinfo ( sb , type ) - > dqi_priv ;
cancel_delayed_work_sync ( & oinfo - > dqi_sync_work ) ;
2010-05-19 15:16:41 +04:00
status = dquot_suspend ( sb , type ) ;
2010-05-14 00:14:53 +04:00
}
2008-08-21 22:13:17 +04:00
if ( status < 0 )
break ;
}
if ( status < 0 )
mlog ( ML_ERROR , " Failed to suspend/unsuspend quotas on "
" remount (error = %d). \n " , status ) ;
return status ;
}
static int ocfs2_enable_quotas ( struct ocfs2_super * osb )
{
2014-09-10 23:06:39 +04:00
struct inode * inode [ OCFS2_MAXQUOTAS ] = { NULL , NULL } ;
2008-08-21 22:13:17 +04:00
struct super_block * sb = osb - > sb ;
2014-09-10 23:06:39 +04:00
unsigned int feature [ OCFS2_MAXQUOTAS ] = {
OCFS2_FEATURE_RO_COMPAT_USRQUOTA ,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA } ;
unsigned int ino [ OCFS2_MAXQUOTAS ] = {
LOCAL_USER_QUOTA_SYSTEM_INODE ,
2008-08-21 22:13:17 +04:00
LOCAL_GROUP_QUOTA_SYSTEM_INODE } ;
int status ;
int type ;
sb_dqopt ( sb ) - > flags | = DQUOT_QUOTA_SYS_FILE | DQUOT_NEGATIVE_USAGE ;
2014-09-10 23:06:39 +04:00
for ( type = 0 ; type < OCFS2_MAXQUOTAS ; type + + ) {
2008-08-21 22:13:17 +04:00
if ( ! OCFS2_HAS_RO_COMPAT_FEATURE ( sb , feature [ type ] ) )
continue ;
inode [ type ] = ocfs2_get_system_file_inode ( osb , ino [ type ] ,
osb - > slot_num ) ;
if ( ! inode [ type ] ) {
status = - ENOENT ;
goto out_quota_off ;
}
2010-05-19 15:16:45 +04:00
status = dquot_enable ( inode [ type ] , type , QFMT_OCFS2 ,
DQUOT_USAGE_ENABLED ) ;
2008-08-21 22:13:17 +04:00
if ( status < 0 )
goto out_quota_off ;
}
2014-09-10 23:06:39 +04:00
for ( type = 0 ; type < OCFS2_MAXQUOTAS ; type + + )
2008-08-21 22:13:17 +04:00
iput ( inode [ type ] ) ;
return 0 ;
out_quota_off :
ocfs2_disable_quotas ( osb ) ;
2014-09-10 23:06:39 +04:00
for ( type = 0 ; type < OCFS2_MAXQUOTAS ; type + + )
2008-08-21 22:13:17 +04:00
iput ( inode [ type ] ) ;
mlog_errno ( status ) ;
return status ;
}
static void ocfs2_disable_quotas ( struct ocfs2_super * osb )
{
int type ;
struct inode * inode ;
struct super_block * sb = osb - > sb ;
2010-05-14 00:14:53 +04:00
struct ocfs2_mem_dqinfo * oinfo ;
2008-08-21 22:13:17 +04:00
/* We mostly ignore errors in this function because there's not much
* we can do when we see them */
2014-09-10 23:06:39 +04:00
for ( type = 0 ; type < OCFS2_MAXQUOTAS ; type + + ) {
2008-08-21 22:13:17 +04:00
if ( ! sb_has_quota_loaded ( sb , type ) )
continue ;
2010-05-14 00:14:53 +04:00
/* Cancel periodic syncing before we grab dqonoff_mutex */
oinfo = sb_dqinfo ( sb , type ) - > dqi_priv ;
cancel_delayed_work_sync ( & oinfo - > dqi_sync_work ) ;
2008-08-21 22:13:17 +04:00
inode = igrab ( sb - > s_dquot . files [ type ] ) ;
/* Turn off quotas. This will remove all dquot structures from
* memory and so they will be automatically synced to global
* quota files */
2010-05-19 15:16:41 +04:00
dquot_disable ( sb , type , DQUOT_USAGE_ENABLED |
DQUOT_LIMITS_ENABLED ) ;
2008-08-21 22:13:17 +04:00
if ( ! inode )
continue ;
iput ( inode ) ;
}
}
2005-12-16 01:31:24 +03:00
static int ocfs2_fill_super ( struct super_block * sb , void * data , int silent )
{
struct dentry * root ;
int status , sector_size ;
2007-09-07 20:16:10 +04:00
struct mount_options parsed_options ;
2005-12-16 01:31:24 +03:00
struct inode * inode = NULL ;
struct ocfs2_super * osb = NULL ;
struct buffer_head * bh = NULL ;
2013-08-29 03:35:21 +04:00
char nodestr [ 12 ] ;
2009-01-07 01:57:08 +03:00
struct ocfs2_blockcheck_stats stats ;
2005-12-16 01:31:24 +03:00
2011-02-23 16:29:08 +03:00
trace_ocfs2_fill_super ( sb , data , silent ) ;
2005-12-16 01:31:24 +03:00
2007-09-07 20:16:10 +04:00
if ( ! ocfs2_parse_options ( sb , data , & parsed_options , 0 ) ) {
2005-12-16 01:31:24 +03:00
status = - EINVAL ;
goto read_super_error ;
}
/* probe for superblock */
2009-01-07 01:57:08 +03:00
status = ocfs2_sb_probe ( sb , & bh , & sector_size , & stats ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
mlog ( ML_ERROR , " superblock probe failed! \n " ) ;
goto read_super_error ;
}
2009-01-07 01:57:08 +03:00
status = ocfs2_initialize_super ( sb , bh , sector_size , & stats ) ;
2005-12-16 01:31:24 +03:00
osb = OCFS2_SB ( sb ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto read_super_error ;
}
brelse ( bh ) ;
bh = NULL ;
2008-11-14 06:17:52 +03:00
2009-10-15 16:54:04 +04:00
if ( ! ocfs2_check_set_options ( sb , & parsed_options ) ) {
status = - EINVAL ;
goto read_super_error ;
}
2007-09-07 20:16:10 +04:00
osb - > s_mount_opt = parsed_options . mount_opt ;
osb - > s_atime_quantum = parsed_options . atime_quantum ;
osb - > preferred_slot = parsed_options . slot ;
2007-11-08 01:40:36 +03:00
osb - > osb_commit_interval = parsed_options . commit_interval ;
2010-04-06 05:17:13 +04:00
ocfs2_la_set_sizes ( osb , parsed_options . localalloc_opt ) ;
2009-12-08 00:10:48 +03:00
osb - > osb_resv_level = parsed_options . resv_level ;
2010-04-06 05:17:16 +04:00
osb - > osb_dir_resv_level = parsed_options . resv_level ;
if ( parsed_options . dir_resv_level = = - 1 )
osb - > osb_dir_resv_level = parsed_options . resv_level ;
else
osb - > osb_dir_resv_level = parsed_options . dir_resv_level ;
2005-12-16 01:31:24 +03:00
2008-02-02 02:08:23 +03:00
status = ocfs2_verify_userspace_stack ( osb , & parsed_options ) ;
if ( status )
goto read_super_error ;
2005-12-16 01:31:24 +03:00
sb - > s_magic = OCFS2_SUPER_MAGIC ;
2011-06-04 02:24:58 +04:00
sb - > s_flags = ( sb - > s_flags & ~ ( MS_POSIXACL | MS_NOSEC ) ) |
2008-11-14 06:17:52 +03:00
( ( osb - > s_mount_opt & OCFS2_MOUNT_POSIX_ACL ) ? MS_POSIXACL : 0 ) ;
2005-12-16 01:31:24 +03:00
/* Hard readonly mode only if: bdev_read_only, MS_RDONLY,
* heartbeat = none */
if ( bdev_read_only ( sb - > s_bdev ) ) {
if ( ! ( sb - > s_flags & MS_RDONLY ) ) {
status = - EACCES ;
mlog ( ML_ERROR , " Readonly device detected but readonly "
" mount was not specified. \n " ) ;
goto read_super_error ;
}
/* You should not be able to start a local heartbeat
* on a readonly device . */
if ( osb - > s_mount_opt & OCFS2_MOUNT_HB_LOCAL ) {
status = - EROFS ;
mlog ( ML_ERROR , " Local heartbeat specified on readonly "
" device. \n " ) ;
goto read_super_error ;
}
status = ocfs2_check_journals_nolocks ( osb ) ;
if ( status < 0 ) {
if ( status = = - EROFS )
mlog ( ML_ERROR , " Recovery required on readonly "
" file system, but write access is "
" unavailable. \n " ) ;
else
2010-01-26 03:57:38 +03:00
mlog_errno ( status ) ;
2005-12-16 01:31:24 +03:00
goto read_super_error ;
}
ocfs2_set_ro_flag ( osb , 1 ) ;
2011-07-24 21:34:54 +04:00
printk ( KERN_NOTICE " ocfs2: Readonly device (%s) detected. "
" Cluster services will not be used for this mount. "
" Recovery will be skipped. \n " , osb - > dev_str ) ;
2005-12-16 01:31:24 +03:00
}
if ( ! ocfs2_is_hard_readonly ( osb ) ) {
if ( sb - > s_flags & MS_RDONLY )
ocfs2_set_ro_flag ( osb , 0 ) ;
}
2006-12-06 04:56:35 +03:00
status = ocfs2_verify_heartbeat ( osb ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto read_super_error ;
}
2005-12-16 01:31:24 +03:00
osb - > osb_debug_root = debugfs_create_dir ( osb - > uuid_str ,
ocfs2_debugfs_root ) ;
Revert "ocfs2: incorrect check for debugfs returns"
This reverts commit e2ac55b6a8e337fac7cc59c6f452caac92ab5ee6.
Huang Ying reports that this causes a hang at boot with debugfs disabled.
It is true that the debugfs error checks are kind of confusing, and this
code certainly merits more cleanup and thinking about it, but there's
something wrong with the trivial "check not just for NULL, but for error
pointers too" patch.
Yes, with debugfs disabled, we will end up setting the o2hb_debug_dir
pointer variable to an error pointer (-ENODEV), and then continue as if
everything was fine. But since debugfs is disabled, all the _users_ of
that pointer end up being compiled away, so even though the pointer can
not be dereferenced, that's still fine.
So it's confusing and somewhat questionable, but the "more correct"
error checks end up causing more trouble than they fix.
Reported-by: Huang Ying <ying.huang@intel.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Chengyu Song <csong84@gatech.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-04-21 19:17:28 +03:00
if ( ! osb - > osb_debug_root ) {
2005-12-16 01:31:24 +03:00
status = - EINVAL ;
mlog ( ML_ERROR , " Unable to create per-mount debugfs root. \n " ) ;
goto read_super_error ;
}
2008-12-18 01:17:43 +03:00
osb - > osb_ctxt = debugfs_create_file ( " fs_state " , S_IFREG | S_IRUSR ,
osb - > osb_debug_root ,
osb ,
& ocfs2_osb_debug_fops ) ;
Revert "ocfs2: incorrect check for debugfs returns"
This reverts commit e2ac55b6a8e337fac7cc59c6f452caac92ab5ee6.
Huang Ying reports that this causes a hang at boot with debugfs disabled.
It is true that the debugfs error checks are kind of confusing, and this
code certainly merits more cleanup and thinking about it, but there's
something wrong with the trivial "check not just for NULL, but for error
pointers too" patch.
Yes, with debugfs disabled, we will end up setting the o2hb_debug_dir
pointer variable to an error pointer (-ENODEV), and then continue as if
everything was fine. But since debugfs is disabled, all the _users_ of
that pointer end up being compiled away, so even though the pointer can
not be dereferenced, that's still fine.
So it's confusing and somewhat questionable, but the "more correct"
error checks end up causing more trouble than they fix.
Reported-by: Huang Ying <ying.huang@intel.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Chengyu Song <csong84@gatech.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-04-21 19:17:28 +03:00
if ( ! osb - > osb_ctxt ) {
2008-12-18 01:17:43 +03:00
status = - EINVAL ;
mlog_errno ( status ) ;
goto read_super_error ;
}
2009-01-07 01:57:08 +03:00
if ( ocfs2_meta_ecc ( osb ) ) {
status = ocfs2_blockcheck_stats_debugfs_install (
& osb - > osb_ecc_stats ,
osb - > osb_debug_root ) ;
if ( status ) {
mlog ( ML_ERROR ,
" Unable to create blockcheck statistics "
" files \n " ) ;
goto read_super_error ;
}
}
2005-12-16 01:31:24 +03:00
status = ocfs2_mount_volume ( sb ) ;
if ( status < 0 )
goto read_super_error ;
2012-02-13 06:46:49 +04:00
if ( osb - > root_inode )
inode = igrab ( osb - > root_inode ) ;
2005-12-16 01:31:24 +03:00
if ( ! inode ) {
status = - EIO ;
mlog_errno ( status ) ;
goto read_super_error ;
}
2012-01-09 07:15:13 +04:00
root = d_make_root ( inode ) ;
2005-12-16 01:31:24 +03:00
if ( ! root ) {
status = - ENOMEM ;
mlog_errno ( status ) ;
goto read_super_error ;
}
sb - > s_root = root ;
ocfs2_complete_mount_recovery ( osb ) ;
2006-12-06 04:56:35 +03:00
if ( ocfs2_mount_local ( osb ) )
snprintf ( nodestr , sizeof ( nodestr ) , " local " ) ;
else
2008-01-31 02:38:24 +03:00
snprintf ( nodestr , sizeof ( nodestr ) , " %u " , osb - > node_num ) ;
2006-12-06 04:56:35 +03:00
printk ( KERN_INFO " ocfs2: Mounting device (%s) on (node %s, slot %d) "
2006-04-28 03:41:31 +04:00
" with %s data mode. \n " ,
2006-12-06 04:56:35 +03:00
osb - > dev_str , nodestr , osb - > slot_num ,
2005-12-16 01:31:24 +03:00
osb - > s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? " writeback " :
" ordered " ) ;
atomic_set ( & osb - > vol_state , VOLUME_MOUNTED ) ;
wake_up ( & osb - > osb_mount_event ) ;
2008-08-21 22:13:17 +04:00
/* Now we can initialize quotas because we can afford to wait
* for cluster locks recovery now . That also means that truncation
* log recovery can happen but that waits for proper quota setup */
if ( ! ( sb - > s_flags & MS_RDONLY ) ) {
status = ocfs2_enable_quotas ( osb ) ;
if ( status < 0 ) {
/* We have to err-out specially here because
* s_root is already set */
mlog_errno ( status ) ;
atomic_set ( & osb - > vol_state , VOLUME_DISABLED ) ;
wake_up ( & osb - > osb_mount_event ) ;
return status ;
}
}
ocfs2_complete_quota_recovery ( osb ) ;
/* Now we wake up again for processes waiting for quotas */
atomic_set ( & osb - > vol_state , VOLUME_MOUNTED_QUOTAS ) ;
wake_up ( & osb - > osb_mount_event ) ;
2009-06-22 22:40:07 +04:00
/* Start this when the mount is almost sure of being successful */
2009-07-08 01:22:12 +04:00
ocfs2_orphan_scan_start ( osb ) ;
2009-06-22 22:40:07 +04:00
2005-12-16 01:31:24 +03:00
return status ;
read_super_error :
2008-10-08 01:25:16 +04:00
brelse ( bh ) ;
2005-12-16 01:31:24 +03:00
if ( osb ) {
atomic_set ( & osb - > vol_state , VOLUME_DISABLED ) ;
wake_up ( & osb - > osb_mount_event ) ;
ocfs2_dismount_volume ( sb , 1 ) ;
}
2011-03-07 11:43:21 +03:00
if ( status )
mlog_errno ( status ) ;
2005-12-16 01:31:24 +03:00
return status ;
}
2010-07-25 00:46:55 +04:00
static struct dentry * ocfs2_mount ( struct file_system_type * fs_type ,
[PATCH] VFS: Permit filesystem to override root dentry on mount
Extend the get_sb() filesystem operation to take an extra argument that
permits the VFS to pass in the target vfsmount that defines the mountpoint.
The filesystem is then required to manually set the superblock and root dentry
pointers. For most filesystems, this should be done with simple_set_mnt()
which will set the superblock pointer and then set the root dentry to the
superblock's s_root (as per the old default behaviour).
The get_sb() op now returns an integer as there's now no need to return the
superblock pointer.
This patch permits a superblock to be implicitly shared amongst several mount
points, such as can be done with NFS to avoid potential inode aliasing. In
such a case, simple_set_mnt() would not be called, and instead the mnt_root
and mnt_sb would be set directly.
The patch also makes the following changes:
(*) the get_sb_*() convenience functions in the core kernel now take a vfsmount
pointer argument and return an integer, so most filesystems have to change
very little.
(*) If one of the convenience function is not used, then get_sb() should
normally call simple_set_mnt() to instantiate the vfsmount. This will
always return 0, and so can be tail-called from get_sb().
(*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the
dcache upon superblock destruction rather than shrink_dcache_anon().
This is required because the superblock may now have multiple trees that
aren't actually bound to s_root, but that still need to be cleaned up. The
currently called functions assume that the whole tree is rooted at s_root,
and that anonymous dentries are not the roots of trees which results in
dentries being left unculled.
However, with the way NFS superblock sharing are currently set to be
implemented, these assumptions are violated: the root of the filesystem is
simply a dummy dentry and inode (the real inode for '/' may well be
inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries
with child trees.
[*] Anonymous until discovered from another tree.
(*) The documentation has been adjusted, including the additional bit of
changing ext2_* into foo_* in the documentation.
[akpm@osdl.org: convert ipath_fs, do other stuff]
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Nathan Scott <nathans@sgi.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-23 13:02:57 +04:00
int flags ,
const char * dev_name ,
2010-07-25 00:46:55 +04:00
void * data )
2005-12-16 01:31:24 +03:00
{
2010-07-25 00:46:55 +04:00
return mount_bdev ( fs_type , flags , dev_name , data , ocfs2_fill_super ) ;
2005-12-16 01:31:24 +03:00
}
static struct file_system_type ocfs2_fs_type = {
. owner = THIS_MODULE ,
. name = " ocfs2 " ,
2010-07-25 00:46:55 +04:00
. mount = ocfs2_mount ,
2014-04-04 01:46:59 +04:00
. kill_sb = kill_block_super ,
2006-09-09 01:22:54 +04:00
. fs_flags = FS_REQUIRES_DEV | FS_RENAME_DOES_D_MOVE ,
2005-12-16 01:31:24 +03:00
. next = NULL
} ;
2013-03-07 13:08:55 +04:00
MODULE_ALIAS_FS ( " ocfs2 " ) ;
2005-12-16 01:31:24 +03:00
2009-10-15 16:54:04 +04:00
static int ocfs2_check_set_options ( struct super_block * sb ,
struct mount_options * options )
{
if ( options - > mount_opt & OCFS2_MOUNT_USRQUOTA & &
! OCFS2_HAS_RO_COMPAT_FEATURE ( sb ,
OCFS2_FEATURE_RO_COMPAT_USRQUOTA ) ) {
mlog ( ML_ERROR , " User quotas were requested, but this "
" filesystem does not have the feature enabled. \n " ) ;
return 0 ;
}
if ( options - > mount_opt & OCFS2_MOUNT_GRPQUOTA & &
! OCFS2_HAS_RO_COMPAT_FEATURE ( sb ,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA ) ) {
mlog ( ML_ERROR , " Group quotas were requested, but this "
" filesystem does not have the feature enabled. \n " ) ;
return 0 ;
}
if ( options - > mount_opt & OCFS2_MOUNT_POSIX_ACL & &
! OCFS2_HAS_INCOMPAT_FEATURE ( sb , OCFS2_FEATURE_INCOMPAT_XATTR ) ) {
mlog ( ML_ERROR , " ACL support requested but extended attributes "
" feature is not enabled \n " ) ;
return 0 ;
}
/* No ACL setting specified? Use XATTR feature... */
if ( ! ( options - > mount_opt & ( OCFS2_MOUNT_POSIX_ACL |
OCFS2_MOUNT_NO_POSIX_ACL ) ) ) {
if ( OCFS2_HAS_INCOMPAT_FEATURE ( sb , OCFS2_FEATURE_INCOMPAT_XATTR ) )
options - > mount_opt | = OCFS2_MOUNT_POSIX_ACL ;
else
options - > mount_opt | = OCFS2_MOUNT_NO_POSIX_ACL ;
}
return 1 ;
}
2005-12-16 01:31:24 +03:00
static int ocfs2_parse_options ( struct super_block * sb ,
char * options ,
2007-09-07 20:16:10 +04:00
struct mount_options * mopt ,
2005-12-16 01:31:24 +03:00
int is_remount )
{
2011-01-31 22:31:04 +03:00
int status , user_stack = 0 ;
2005-12-16 01:31:24 +03:00
char * p ;
2010-10-08 02:23:50 +04:00
u32 tmp ;
2016-01-15 02:17:09 +03:00
int token , option ;
substring_t args [ MAX_OPT_ARGS ] ;
2005-12-16 01:31:24 +03:00
2011-02-23 16:29:08 +03:00
trace_ocfs2_parse_options ( is_remount , options ? options : " (none) " ) ;
2005-12-16 01:31:24 +03:00
2007-11-08 01:40:36 +03:00
mopt - > commit_interval = 0 ;
2010-04-14 05:00:31 +04:00
mopt - > mount_opt = OCFS2_MOUNT_NOINTR ;
2007-09-07 20:16:10 +04:00
mopt - > atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM ;
mopt - > slot = OCFS2_INVALID_SLOT ;
2010-04-06 05:17:13 +04:00
mopt - > localalloc_opt = - 1 ;
2008-02-02 02:08:23 +03:00
mopt - > cluster_stack [ 0 ] = ' \0 ' ;
2009-12-08 00:10:48 +03:00
mopt - > resv_level = OCFS2_DEFAULT_RESV_LEVEL ;
2010-04-06 05:17:16 +04:00
mopt - > dir_resv_level = - 1 ;
2005-12-16 01:31:24 +03:00
if ( ! options ) {
status = 1 ;
goto bail ;
}
while ( ( p = strsep ( & options , " , " ) ) ! = NULL ) {
if ( ! * p )
continue ;
token = match_token ( p , tokens , args ) ;
switch ( token ) {
case Opt_hb_local :
2007-09-07 20:16:10 +04:00
mopt - > mount_opt | = OCFS2_MOUNT_HB_LOCAL ;
2005-12-16 01:31:24 +03:00
break ;
case Opt_hb_none :
2010-10-08 02:23:50 +04:00
mopt - > mount_opt | = OCFS2_MOUNT_HB_NONE ;
break ;
case Opt_hb_global :
mopt - > mount_opt | = OCFS2_MOUNT_HB_GLOBAL ;
2005-12-16 01:31:24 +03:00
break ;
case Opt_barrier :
if ( match_int ( & args [ 0 ] , & option ) ) {
status = 0 ;
goto bail ;
}
if ( option )
2007-09-07 20:16:10 +04:00
mopt - > mount_opt | = OCFS2_MOUNT_BARRIER ;
2005-12-16 01:31:24 +03:00
else
2007-09-07 20:16:10 +04:00
mopt - > mount_opt & = ~ OCFS2_MOUNT_BARRIER ;
2005-12-16 01:31:24 +03:00
break ;
case Opt_intr :
2007-09-07 20:16:10 +04:00
mopt - > mount_opt & = ~ OCFS2_MOUNT_NOINTR ;
2005-12-16 01:31:24 +03:00
break ;
case Opt_nointr :
2007-09-07 20:16:10 +04:00
mopt - > mount_opt | = OCFS2_MOUNT_NOINTR ;
2005-12-16 01:31:24 +03:00
break ;
case Opt_err_panic :
2015-09-05 01:44:11 +03:00
mopt - > mount_opt & = ~ OCFS2_MOUNT_ERRORS_CONT ;
mopt - > mount_opt & = ~ OCFS2_MOUNT_ERRORS_ROFS ;
2007-09-07 20:16:10 +04:00
mopt - > mount_opt | = OCFS2_MOUNT_ERRORS_PANIC ;
2005-12-16 01:31:24 +03:00
break ;
case Opt_err_ro :
2015-09-05 01:44:11 +03:00
mopt - > mount_opt & = ~ OCFS2_MOUNT_ERRORS_CONT ;
2007-09-07 20:16:10 +04:00
mopt - > mount_opt & = ~ OCFS2_MOUNT_ERRORS_PANIC ;
2015-09-05 01:44:11 +03:00
mopt - > mount_opt | = OCFS2_MOUNT_ERRORS_ROFS ;
break ;
case Opt_err_cont :
mopt - > mount_opt & = ~ OCFS2_MOUNT_ERRORS_ROFS ;
mopt - > mount_opt & = ~ OCFS2_MOUNT_ERRORS_PANIC ;
mopt - > mount_opt | = OCFS2_MOUNT_ERRORS_CONT ;
2005-12-16 01:31:24 +03:00
break ;
case Opt_data_ordered :
2007-09-07 20:16:10 +04:00
mopt - > mount_opt & = ~ OCFS2_MOUNT_DATA_WRITEBACK ;
2005-12-16 01:31:24 +03:00
break ;
case Opt_data_writeback :
2007-09-07 20:16:10 +04:00
mopt - > mount_opt | = OCFS2_MOUNT_DATA_WRITEBACK ;
2005-12-16 01:31:24 +03:00
break ;
2008-08-18 13:11:00 +04:00
case Opt_user_xattr :
mopt - > mount_opt & = ~ OCFS2_MOUNT_NOUSERXATTR ;
break ;
case Opt_nouser_xattr :
mopt - > mount_opt | = OCFS2_MOUNT_NOUSERXATTR ;
break ;
2006-11-15 10:48:42 +03:00
case Opt_atime_quantum :
if ( match_int ( & args [ 0 ] , & option ) ) {
status = 0 ;
goto bail ;
}
if ( option > = 0 )
2007-09-07 20:16:10 +04:00
mopt - > atime_quantum = option ;
2006-11-15 10:48:42 +03:00
break ;
2007-06-19 04:00:24 +04:00
case Opt_slot :
if ( match_int ( & args [ 0 ] , & option ) ) {
status = 0 ;
goto bail ;
}
if ( option )
2007-09-07 20:16:10 +04:00
mopt - > slot = ( s16 ) option ;
2007-06-19 04:00:24 +04:00
break ;
2007-11-08 01:40:36 +03:00
case Opt_commit :
if ( match_int ( & args [ 0 ] , & option ) ) {
status = 0 ;
goto bail ;
}
if ( option < 0 )
return 0 ;
if ( option = = 0 )
2008-09-04 07:03:41 +04:00
option = JBD2_DEFAULT_MAX_COMMIT_AGE ;
2007-11-08 01:40:36 +03:00
mopt - > commit_interval = HZ * option ;
break ;
2007-12-21 01:58:11 +03:00
case Opt_localalloc :
if ( match_int ( & args [ 0 ] , & option ) ) {
status = 0 ;
goto bail ;
}
2010-04-06 05:17:13 +04:00
if ( option > = 0 )
2007-12-21 01:58:11 +03:00
mopt - > localalloc_opt = option ;
break ;
2007-12-21 03:49:04 +03:00
case Opt_localflocks :
/*
* Changing this during remount could race
* flock ( ) requests , or " unbalance " existing
* ones ( e . g . , a lock is taken in one mode but
* dropped in the other ) . If users care enough
* to flip locking modes during remount , we
* could add a " local " flag to individual
* flock structures for proper tracking of
* state .
*/
if ( ! is_remount )
mopt - > mount_opt | = OCFS2_MOUNT_LOCALFLOCKS ;
break ;
2008-02-02 02:08:23 +03:00
case Opt_stack :
/* Check both that the option we were passed
* is of the right length and that it is a proper
* string of the right length .
*/
if ( ( ( args [ 0 ] . to - args [ 0 ] . from ) ! =
OCFS2_STACK_LABEL_LEN ) | |
( strnlen ( args [ 0 ] . from ,
OCFS2_STACK_LABEL_LEN ) ! =
OCFS2_STACK_LABEL_LEN ) ) {
mlog ( ML_ERROR ,
" Invalid cluster_stack option \n " ) ;
status = 0 ;
goto bail ;
}
memcpy ( mopt - > cluster_stack , args [ 0 ] . from ,
OCFS2_STACK_LABEL_LEN ) ;
mopt - > cluster_stack [ OCFS2_STACK_LABEL_LEN ] = ' \0 ' ;
2011-01-31 22:31:04 +03:00
/*
* Open code the memcmp here as we don ' t have
* an osb to pass to
* ocfs2_userspace_stack ( ) .
*/
if ( memcmp ( mopt - > cluster_stack ,
OCFS2_CLASSIC_CLUSTER_STACK ,
OCFS2_STACK_LABEL_LEN ) )
user_stack = 1 ;
2008-02-02 02:08:23 +03:00
break ;
2008-09-04 07:03:40 +04:00
case Opt_inode64 :
mopt - > mount_opt | = OCFS2_MOUNT_INODE64 ;
break ;
2008-08-21 22:13:17 +04:00
case Opt_usrquota :
mopt - > mount_opt | = OCFS2_MOUNT_USRQUOTA ;
break ;
case Opt_grpquota :
mopt - > mount_opt | = OCFS2_MOUNT_GRPQUOTA ;
break ;
2010-10-11 12:46:39 +04:00
case Opt_coherency_buffered :
mopt - > mount_opt | = OCFS2_MOUNT_COHERENCY_BUFFERED ;
break ;
case Opt_coherency_full :
mopt - > mount_opt & = ~ OCFS2_MOUNT_COHERENCY_BUFFERED ;
break ;
2008-11-14 06:17:52 +03:00
case Opt_acl :
mopt - > mount_opt | = OCFS2_MOUNT_POSIX_ACL ;
2009-10-15 16:54:04 +04:00
mopt - > mount_opt & = ~ OCFS2_MOUNT_NO_POSIX_ACL ;
2008-11-14 06:17:52 +03:00
break ;
case Opt_noacl :
2009-10-15 16:54:04 +04:00
mopt - > mount_opt | = OCFS2_MOUNT_NO_POSIX_ACL ;
2008-11-14 06:17:52 +03:00
mopt - > mount_opt & = ~ OCFS2_MOUNT_POSIX_ACL ;
break ;
2009-12-08 00:10:48 +03:00
case Opt_resv_level :
if ( is_remount )
break ;
if ( match_int ( & args [ 0 ] , & option ) ) {
status = 0 ;
goto bail ;
}
if ( option > = OCFS2_MIN_RESV_LEVEL & &
option < OCFS2_MAX_RESV_LEVEL )
mopt - > resv_level = option ;
break ;
2010-04-06 05:17:16 +04:00
case Opt_dir_resv_level :
if ( is_remount )
break ;
if ( match_int ( & args [ 0 ] , & option ) ) {
status = 0 ;
goto bail ;
}
if ( option > = OCFS2_MIN_RESV_LEVEL & &
option < OCFS2_MAX_RESV_LEVEL )
mopt - > dir_resv_level = option ;
break ;
2015-02-11 01:09:04 +03:00
case Opt_journal_async_commit :
mopt - > mount_opt | = OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT ;
break ;
2005-12-16 01:31:24 +03:00
default :
mlog ( ML_ERROR ,
" Unrecognized mount option \" %s \" "
" or missing value \n " , p ) ;
status = 0 ;
goto bail ;
}
}
2011-01-31 22:31:04 +03:00
if ( user_stack = = 0 ) {
/* Ensure only one heartbeat mode */
tmp = mopt - > mount_opt & ( OCFS2_MOUNT_HB_LOCAL |
OCFS2_MOUNT_HB_GLOBAL |
OCFS2_MOUNT_HB_NONE ) ;
if ( hweight32 ( tmp ) ! = 1 ) {
mlog ( ML_ERROR , " Invalid heartbeat mount options \n " ) ;
status = 0 ;
goto bail ;
}
2010-10-08 02:23:50 +04:00
}
2005-12-16 01:31:24 +03:00
status = 1 ;
bail :
return status ;
}
2011-12-09 06:32:45 +04:00
static int ocfs2_show_options ( struct seq_file * s , struct dentry * root )
2007-09-07 00:34:16 +04:00
{
2011-12-09 06:32:45 +04:00
struct ocfs2_super * osb = OCFS2_SB ( root - > d_sb ) ;
2007-09-07 00:34:16 +04:00
unsigned long opts = osb - > s_mount_opt ;
2008-07-29 01:55:20 +04:00
unsigned int local_alloc_megs ;
2007-09-07 00:34:16 +04:00
2010-10-08 02:23:50 +04:00
if ( opts & ( OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL ) ) {
seq_printf ( s , " ,_netdev " ) ;
if ( opts & OCFS2_MOUNT_HB_LOCAL )
seq_printf ( s , " ,%s " , OCFS2_HB_LOCAL ) ;
else
seq_printf ( s , " ,%s " , OCFS2_HB_GLOBAL ) ;
} else
seq_printf ( s , " ,%s " , OCFS2_HB_NONE ) ;
2007-09-07 00:34:16 +04:00
if ( opts & OCFS2_MOUNT_NOINTR )
seq_printf ( s , " ,nointr " ) ;
if ( opts & OCFS2_MOUNT_DATA_WRITEBACK )
seq_printf ( s , " ,data=writeback " ) ;
else
seq_printf ( s , " ,data=ordered " ) ;
if ( opts & OCFS2_MOUNT_BARRIER )
seq_printf ( s , " ,barrier=1 " ) ;
if ( opts & OCFS2_MOUNT_ERRORS_PANIC )
seq_printf ( s , " ,errors=panic " ) ;
2015-09-05 01:44:11 +03:00
else if ( opts & OCFS2_MOUNT_ERRORS_CONT )
seq_printf ( s , " ,errors=continue " ) ;
2007-09-07 00:34:16 +04:00
else
seq_printf ( s , " ,errors=remount-ro " ) ;
if ( osb - > preferred_slot ! = OCFS2_INVALID_SLOT )
seq_printf ( s , " ,preferred_slot=%d " , osb - > preferred_slot ) ;
2011-12-09 06:32:45 +04:00
seq_printf ( s , " ,atime_quantum=%u " , osb - > s_atime_quantum ) ;
2007-09-07 00:34:16 +04:00
2007-11-08 01:40:36 +03:00
if ( osb - > osb_commit_interval )
seq_printf ( s , " ,commit=%u " ,
( unsigned ) ( osb - > osb_commit_interval / HZ ) ) ;
2008-07-29 01:55:20 +04:00
local_alloc_megs = osb - > local_alloc_bits > > ( 20 - osb - > s_clustersize_bits ) ;
2010-04-06 05:17:14 +04:00
if ( local_alloc_megs ! = ocfs2_la_default_mb ( osb ) )
2008-07-29 01:55:20 +04:00
seq_printf ( s , " ,localalloc=%d " , local_alloc_megs ) ;
2007-12-21 01:58:11 +03:00
2007-12-21 03:49:04 +03:00
if ( opts & OCFS2_MOUNT_LOCALFLOCKS )
seq_printf ( s , " ,localflocks, " ) ;
2008-02-02 02:08:23 +03:00
if ( osb - > osb_cluster_stack [ 0 ] )
fs: create and use seq_show_option for escaping
Many file systems that implement the show_options hook fail to correctly
escape their output which could lead to unescaped characters (e.g. new
lines) leaking into /proc/mounts and /proc/[pid]/mountinfo files. This
could lead to confusion, spoofed entries (resulting in things like
systemd issuing false d-bus "mount" notifications), and who knows what
else. This looks like it would only be the root user stepping on
themselves, but it's possible weird things could happen in containers or
in other situations with delegated mount privileges.
Here's an example using overlay with setuid fusermount trusting the
contents of /proc/mounts (via the /etc/mtab symlink). Imagine the use
of "sudo" is something more sneaky:
$ BASE="ovl"
$ MNT="$BASE/mnt"
$ LOW="$BASE/lower"
$ UP="$BASE/upper"
$ WORK="$BASE/work/ 0 0
none /proc fuse.pwn user_id=1000"
$ mkdir -p "$LOW" "$UP" "$WORK"
$ sudo mount -t overlay -o "lowerdir=$LOW,upperdir=$UP,workdir=$WORK" none /mnt
$ cat /proc/mounts
none /root/ovl/mnt overlay rw,relatime,lowerdir=ovl/lower,upperdir=ovl/upper,workdir=ovl/work/ 0 0
none /proc fuse.pwn user_id=1000 0 0
$ fusermount -u /proc
$ cat /proc/mounts
cat: /proc/mounts: No such file or directory
This fixes the problem by adding new seq_show_option and
seq_show_option_n helpers, and updating the vulnerable show_option
handlers to use them as needed. Some, like SELinux, need to be open
coded due to unusual existing escape mechanisms.
[akpm@linux-foundation.org: add lost chunk, per Kees]
[keescook@chromium.org: seq_show_option should be using const parameters]
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Jan Kara <jack@suse.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Cc: J. R. Okajima <hooanon05g@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-09-05 01:44:57 +03:00
seq_show_option_n ( s , " cluster_stack " , osb - > osb_cluster_stack ,
OCFS2_STACK_LABEL_LEN ) ;
2008-08-21 22:13:17 +04:00
if ( opts & OCFS2_MOUNT_USRQUOTA )
seq_printf ( s , " ,usrquota " ) ;
if ( opts & OCFS2_MOUNT_GRPQUOTA )
seq_printf ( s , " ,grpquota " ) ;
2008-02-02 02:08:23 +03:00
2010-10-11 12:46:39 +04:00
if ( opts & OCFS2_MOUNT_COHERENCY_BUFFERED )
seq_printf ( s , " ,coherency=buffered " ) ;
else
seq_printf ( s , " ,coherency=full " ) ;
2008-09-05 22:29:14 +04:00
if ( opts & OCFS2_MOUNT_NOUSERXATTR )
seq_printf ( s , " ,nouser_xattr " ) ;
else
seq_printf ( s , " ,user_xattr " ) ;
2008-09-04 07:03:40 +04:00
if ( opts & OCFS2_MOUNT_INODE64 )
seq_printf ( s , " ,inode64 " ) ;
2008-11-14 06:17:52 +03:00
if ( opts & OCFS2_MOUNT_POSIX_ACL )
seq_printf ( s , " ,acl " ) ;
else
seq_printf ( s , " ,noacl " ) ;
2009-12-08 00:10:48 +03:00
if ( osb - > osb_resv_level ! = OCFS2_DEFAULT_RESV_LEVEL )
seq_printf ( s , " ,resv_level=%d " , osb - > osb_resv_level ) ;
2010-04-06 05:17:16 +04:00
if ( osb - > osb_dir_resv_level ! = osb - > osb_resv_level )
seq_printf ( s , " ,dir_resv_level=%d " , osb - > osb_resv_level ) ;
2015-02-11 01:09:04 +03:00
if ( opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT )
seq_printf ( s , " ,journal_async_commit " ) ;
2007-09-07 00:34:16 +04:00
return 0 ;
}
2005-12-16 01:31:24 +03:00
static int __init ocfs2_init ( void )
{
2014-04-04 01:46:46 +04:00
int status ;
2011-06-23 01:23:38 +04:00
2005-12-16 01:31:24 +03:00
status = init_ocfs2_uptodate_cache ( ) ;
2012-03-18 06:03:58 +04:00
if ( status < 0 )
goto out1 ;
2005-12-16 01:31:24 +03:00
status = ocfs2_initialize_mem_caches ( ) ;
2012-03-18 06:03:58 +04:00
if ( status < 0 )
goto out2 ;
2005-12-16 01:31:24 +03:00
ocfs2_wq = create_singlethread_workqueue ( " ocfs2_wq " ) ;
if ( ! ocfs2_wq ) {
status = - ENOMEM ;
2012-03-18 06:03:58 +04:00
goto out3 ;
2005-12-16 01:31:24 +03:00
}
ocfs2_debugfs_root = debugfs_create_dir ( " ocfs2 " , NULL ) ;
Revert "ocfs2: incorrect check for debugfs returns"
This reverts commit e2ac55b6a8e337fac7cc59c6f452caac92ab5ee6.
Huang Ying reports that this causes a hang at boot with debugfs disabled.
It is true that the debugfs error checks are kind of confusing, and this
code certainly merits more cleanup and thinking about it, but there's
something wrong with the trivial "check not just for NULL, but for error
pointers too" patch.
Yes, with debugfs disabled, we will end up setting the o2hb_debug_dir
pointer variable to an error pointer (-ENODEV), and then continue as if
everything was fine. But since debugfs is disabled, all the _users_ of
that pointer end up being compiled away, so even though the pointer can
not be dereferenced, that's still fine.
So it's confusing and somewhat questionable, but the "more correct"
error checks end up causing more trouble than they fix.
Reported-by: Huang Ying <ying.huang@intel.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Chengyu Song <csong84@gatech.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2015-04-21 19:17:28 +03:00
if ( ! ocfs2_debugfs_root ) {
status = - ENOMEM ;
2005-12-16 01:31:24 +03:00
mlog ( ML_ERROR , " Unable to create ocfs2 debugfs root. \n " ) ;
2014-12-11 02:41:56 +03:00
goto out4 ;
2005-12-16 01:31:24 +03:00
}
2008-01-31 03:58:36 +03:00
ocfs2_set_locking_protocol ( ) ;
2008-08-25 21:56:50 +04:00
status = register_quota_format ( & ocfs2_quota_format ) ;
2012-03-18 06:03:58 +04:00
if ( status < 0 )
goto out4 ;
status = register_filesystem ( & ocfs2_fs_type ) ;
if ( ! status )
return 0 ;
2005-12-16 01:31:24 +03:00
2012-03-18 06:03:58 +04:00
unregister_quota_format ( & ocfs2_quota_format ) ;
out4 :
destroy_workqueue ( ocfs2_wq ) ;
debugfs_remove ( ocfs2_debugfs_root ) ;
out3 :
ocfs2_free_mem_caches ( ) ;
out2 :
exit_ocfs2_uptodate_cache ( ) ;
out1 :
mlog_errno ( status ) ;
return status ;
2005-12-16 01:31:24 +03:00
}
static void __exit ocfs2_exit ( void )
{
if ( ocfs2_wq ) {
flush_workqueue ( ocfs2_wq ) ;
destroy_workqueue ( ocfs2_wq ) ;
}
2008-08-25 21:56:50 +04:00
unregister_quota_format ( & ocfs2_quota_format ) ;
2005-12-16 01:31:24 +03:00
debugfs_remove ( ocfs2_debugfs_root ) ;
ocfs2_free_mem_caches ( ) ;
unregister_filesystem ( & ocfs2_fs_type ) ;
exit_ocfs2_uptodate_cache ( ) ;
}
static void ocfs2_put_super ( struct super_block * sb )
{
2011-02-23 16:29:08 +03:00
trace_ocfs2_put_super ( sb ) ;
2005-12-16 01:31:24 +03:00
ocfs2_sync_blockdev ( sb ) ;
ocfs2_dismount_volume ( sb , 0 ) ;
}
2006-06-23 13:02:58 +04:00
static int ocfs2_statfs ( struct dentry * dentry , struct kstatfs * buf )
2005-12-16 01:31:24 +03:00
{
struct ocfs2_super * osb ;
u32 numbits , freebits ;
int status ;
struct ocfs2_dinode * bm_lock ;
struct buffer_head * bh = NULL ;
struct inode * inode = NULL ;
2011-02-23 16:29:08 +03:00
trace_ocfs2_statfs ( dentry - > d_sb , buf ) ;
2005-12-16 01:31:24 +03:00
2006-06-23 13:02:58 +04:00
osb = OCFS2_SB ( dentry - > d_sb ) ;
2005-12-16 01:31:24 +03:00
inode = ocfs2_get_system_file_inode ( osb ,
GLOBAL_BITMAP_SYSTEM_INODE ,
OCFS2_INVALID_SLOT ) ;
if ( ! inode ) {
mlog ( ML_ERROR , " failed to get bitmap inode \n " ) ;
status = - EIO ;
goto bail ;
}
2007-10-19 02:30:42 +04:00
status = ocfs2_inode_lock ( inode , & bh , 0 ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
bm_lock = ( struct ocfs2_dinode * ) bh - > b_data ;
numbits = le32_to_cpu ( bm_lock - > id1 . bitmap1 . i_total ) ;
freebits = numbits - le32_to_cpu ( bm_lock - > id1 . bitmap1 . i_used ) ;
buf - > f_type = OCFS2_SUPER_MAGIC ;
2006-06-23 13:02:58 +04:00
buf - > f_bsize = dentry - > d_sb - > s_blocksize ;
2005-12-16 01:31:24 +03:00
buf - > f_namelen = OCFS2_MAX_FILENAME_LEN ;
buf - > f_blocks = ( ( sector_t ) numbits ) *
( osb - > s_clustersize > > osb - > sb - > s_blocksize_bits ) ;
buf - > f_bfree = ( ( sector_t ) freebits ) *
( osb - > s_clustersize > > osb - > sb - > s_blocksize_bits ) ;
buf - > f_bavail = buf - > f_bfree ;
buf - > f_files = numbits ;
buf - > f_ffree = freebits ;
2009-01-16 11:33:05 +03:00
buf - > f_fsid . val [ 0 ] = crc32_le ( 0 , osb - > uuid_str , OCFS2_VOL_UUID_LEN )
& 0xFFFFFFFFUL ;
buf - > f_fsid . val [ 1 ] = crc32_le ( 0 , osb - > uuid_str + OCFS2_VOL_UUID_LEN ,
OCFS2_VOL_UUID_LEN ) & 0xFFFFFFFFUL ;
2005-12-16 01:31:24 +03:00
brelse ( bh ) ;
2007-10-19 02:30:42 +04:00
ocfs2_inode_unlock ( inode , 0 ) ;
2005-12-16 01:31:24 +03:00
status = 0 ;
bail :
2016-01-15 02:17:27 +03:00
iput ( inode ) ;
2005-12-16 01:31:24 +03:00
2011-03-07 11:43:21 +03:00
if ( status )
mlog_errno ( status ) ;
2005-12-16 01:31:24 +03:00
return status ;
}
2008-07-26 06:45:34 +04:00
static void ocfs2_inode_init_once ( void * data )
2005-12-16 01:31:24 +03:00
{
struct ocfs2_inode_info * oi = data ;
2007-05-17 09:10:57 +04:00
oi - > ip_flags = 0 ;
oi - > ip_open_count = 0 ;
spin_lock_init ( & oi - > ip_lock ) ;
ocfs2_extent_map_init ( & oi - > vfs_inode ) ;
INIT_LIST_HEAD ( & oi - > ip_io_markers ) ;
oi - > ip_dir_start_lookup = 0 ;
2014-04-04 01:46:46 +04:00
mutex_init ( & oi - > ip_unaligned_aio ) ;
2007-05-17 09:10:57 +04:00
init_rwsem ( & oi - > ip_alloc_sem ) ;
2008-08-18 13:11:00 +04:00
init_rwsem ( & oi - > ip_xattr_sem ) ;
2007-05-17 09:10:57 +04:00
mutex_init ( & oi - > ip_io_mutex ) ;
2005-12-16 01:31:24 +03:00
2007-05-17 09:10:57 +04:00
oi - > ip_blkno = 0ULL ;
oi - > ip_clusters = 0 ;
2005-12-16 01:31:24 +03:00
2009-12-08 00:15:40 +03:00
ocfs2_resv_init_once ( & oi - > ip_la_data_resv ) ;
2007-05-17 09:10:57 +04:00
ocfs2_lock_res_init_once ( & oi - > ip_rw_lockres ) ;
2007-10-19 02:30:42 +04:00
ocfs2_lock_res_init_once ( & oi - > ip_inode_lockres ) ;
2007-05-17 09:10:57 +04:00
ocfs2_lock_res_init_once ( & oi - > ip_open_lockres ) ;
2005-12-16 01:31:24 +03:00
2009-02-11 07:00:41 +03:00
ocfs2_metadata_cache_init ( INODE_CACHE ( & oi - > vfs_inode ) ,
2009-02-11 06:00:37 +03:00
& ocfs2_inode_caching_ops ) ;
2005-12-16 01:31:24 +03:00
2007-05-17 09:10:57 +04:00
inode_init_once ( & oi - > vfs_inode ) ;
2005-12-16 01:31:24 +03:00
}
static int ocfs2_initialize_mem_caches ( void )
{
ocfs2_inode_cachep = kmem_cache_create ( " ocfs2_inode_cache " ,
2006-03-24 14:16:06 +03:00
sizeof ( struct ocfs2_inode_info ) ,
0 ,
( SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT |
2016-01-15 02:18:21 +03:00
SLAB_MEM_SPREAD | SLAB_ACCOUNT ) ,
2007-07-20 05:11:58 +04:00
ocfs2_inode_init_once ) ;
2008-08-25 21:56:50 +04:00
ocfs2_dquot_cachep = kmem_cache_create ( " ocfs2_dquot_cache " ,
sizeof ( struct ocfs2_dquot ) ,
0 ,
( SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT |
SLAB_MEM_SPREAD ) ,
NULL ) ;
ocfs2_qf_chunk_cachep = kmem_cache_create ( " ocfs2_qf_chunk_cache " ,
sizeof ( struct ocfs2_quota_chunk ) ,
0 ,
( SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD ) ,
NULL ) ;
if ( ! ocfs2_inode_cachep | | ! ocfs2_dquot_cachep | |
! ocfs2_qf_chunk_cachep ) {
if ( ocfs2_inode_cachep )
kmem_cache_destroy ( ocfs2_inode_cachep ) ;
if ( ocfs2_dquot_cachep )
kmem_cache_destroy ( ocfs2_dquot_cachep ) ;
if ( ocfs2_qf_chunk_cachep )
kmem_cache_destroy ( ocfs2_qf_chunk_cachep ) ;
2005-12-16 01:31:24 +03:00
return - ENOMEM ;
2008-08-25 21:56:50 +04:00
}
2005-12-16 01:31:24 +03:00
return 0 ;
}
static void ocfs2_free_mem_caches ( void )
{
2012-09-26 05:33:07 +04:00
/*
* Make sure all delayed rcu free inodes are flushed before we
* destroy cache .
*/
rcu_barrier ( ) ;
2005-12-16 01:31:24 +03:00
if ( ocfs2_inode_cachep )
kmem_cache_destroy ( ocfs2_inode_cachep ) ;
ocfs2_inode_cachep = NULL ;
2008-08-25 21:56:50 +04:00
if ( ocfs2_dquot_cachep )
kmem_cache_destroy ( ocfs2_dquot_cachep ) ;
ocfs2_dquot_cachep = NULL ;
if ( ocfs2_qf_chunk_cachep )
kmem_cache_destroy ( ocfs2_qf_chunk_cachep ) ;
ocfs2_qf_chunk_cachep = NULL ;
2005-12-16 01:31:24 +03:00
}
static int ocfs2_get_sector ( struct super_block * sb ,
struct buffer_head * * bh ,
int block ,
int sect_size )
{
if ( ! sb_set_blocksize ( sb , sect_size ) ) {
mlog ( ML_ERROR , " unable to set blocksize \n " ) ;
return - EIO ;
}
* bh = sb_getblk ( sb , block ) ;
if ( ! * bh ) {
2013-11-13 03:06:54 +04:00
mlog_errno ( - ENOMEM ) ;
return - ENOMEM ;
2005-12-16 01:31:24 +03:00
}
lock_buffer ( * bh ) ;
if ( ! buffer_dirty ( * bh ) )
clear_buffer_uptodate ( * bh ) ;
unlock_buffer ( * bh ) ;
ll_rw_block ( READ , 1 , bh ) ;
wait_on_buffer ( * bh ) ;
2009-02-13 05:11:47 +03:00
if ( ! buffer_uptodate ( * bh ) ) {
mlog_errno ( - EIO ) ;
brelse ( * bh ) ;
* bh = NULL ;
return - EIO ;
}
2005-12-16 01:31:24 +03:00
return 0 ;
}
static int ocfs2_mount_volume ( struct super_block * sb )
{
int status = 0 ;
int unlock_super = 0 ;
struct ocfs2_super * osb = OCFS2_SB ( sb ) ;
if ( ocfs2_is_hard_readonly ( osb ) )
goto leave ;
status = ocfs2_dlm_init ( osb ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto leave ;
}
status = ocfs2_super_lock ( osb , 1 ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto leave ;
}
unlock_super = 1 ;
/* This will load up the node map and add ourselves to it. */
status = ocfs2_find_slot ( osb ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto leave ;
}
/* load all node-local system inodes */
status = ocfs2_init_local_system_inodes ( osb ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto leave ;
}
status = ocfs2_check_volume ( osb ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto leave ;
}
status = ocfs2_truncate_log_init ( osb ) ;
2009-05-19 02:47:20 +04:00
if ( status < 0 )
2005-12-16 01:31:24 +03:00
mlog_errno ( status ) ;
2006-12-06 04:56:35 +03:00
2005-12-16 01:31:24 +03:00
leave :
if ( unlock_super )
ocfs2_super_unlock ( osb , 1 ) ;
return status ;
}
static void ocfs2_dismount_volume ( struct super_block * sb , int mnt_err )
{
2008-02-02 02:03:57 +03:00
int tmp , hangup_needed = 0 ;
2005-12-16 01:31:24 +03:00
struct ocfs2_super * osb = NULL ;
2013-09-25 02:27:32 +04:00
char nodestr [ 12 ] ;
2005-12-16 01:31:24 +03:00
2011-02-23 16:29:08 +03:00
trace_ocfs2_dismount_volume ( sb ) ;
2005-12-16 01:31:24 +03:00
BUG_ON ( ! sb ) ;
osb = OCFS2_SB ( sb ) ;
BUG_ON ( ! osb ) ;
2008-12-18 01:17:43 +03:00
debugfs_remove ( osb - > osb_ctxt ) ;
2009-06-20 03:53:17 +04:00
/* Orphan scan should be stopped as early as possible */
ocfs2_orphan_scan_stop ( osb ) ;
2008-08-21 22:13:17 +04:00
ocfs2_disable_quotas ( osb ) ;
2014-04-04 01:46:56 +04:00
/* All dquots should be freed by now */
WARN_ON ( ! llist_empty ( & osb - > dquot_drop_list ) ) ;
/* Wait for worker to be done with the work structure in osb */
cancel_work_sync ( & osb - > dquot_drop_work ) ;
2005-12-16 01:31:24 +03:00
ocfs2_shutdown_local_alloc ( osb ) ;
2014-06-24 00:22:08 +04:00
ocfs2_truncate_log_shutdown ( osb ) ;
2008-02-01 23:03:57 +03:00
/* This will disable recovery and flush any recovery work. */
ocfs2_recovery_exit ( osb ) ;
2005-12-16 01:31:24 +03:00
ocfs2_journal_shutdown ( osb ) ;
ocfs2_sync_blockdev ( sb ) ;
2009-08-24 07:13:37 +04:00
ocfs2_purge_refcount_trees ( osb ) ;
2008-02-02 01:39:35 +03:00
/* No cluster connection means we've failed during mount, so skip
* all the steps which depended on that to complete . */
if ( osb - > cconn ) {
2005-12-16 01:31:24 +03:00
tmp = ocfs2_super_lock ( osb , 1 ) ;
if ( tmp < 0 ) {
mlog_errno ( tmp ) ;
return ;
}
2007-10-05 01:47:09 +04:00
}
2005-12-16 01:31:24 +03:00
2007-10-05 01:47:09 +04:00
if ( osb - > slot_num ! = OCFS2_INVALID_SLOT )
ocfs2_put_slot ( osb ) ;
2005-12-16 01:31:24 +03:00
2008-02-02 01:39:35 +03:00
if ( osb - > cconn )
2005-12-16 01:31:24 +03:00
ocfs2_super_unlock ( osb , 1 ) ;
ocfs2_release_system_inodes ( osb ) ;
2008-01-30 03:59:56 +03:00
/*
* If we ' re dismounting due to mount error , mount . ocfs2 will clean
* up heartbeat . If we ' re a local mount , there is no heartbeat .
* If we failed before we got a uuid_str yet , we can ' t stop
* heartbeat . Otherwise , do it .
*/
2011-05-27 20:34:19 +04:00
if ( ! mnt_err & & ! ocfs2_mount_local ( osb ) & & osb - > uuid_str & &
! ocfs2_is_hard_readonly ( osb ) )
2008-02-02 02:03:57 +03:00
hangup_needed = 1 ;
if ( osb - > cconn )
ocfs2_dlm_shutdown ( osb , hangup_needed ) ;
2009-01-07 01:57:08 +03:00
ocfs2_blockcheck_stats_debugfs_remove ( & osb - > osb_ecc_stats ) ;
2008-02-02 02:03:57 +03:00
debugfs_remove ( osb - > osb_debug_root ) ;
if ( hangup_needed )
2008-01-30 03:59:56 +03:00
ocfs2_cluster_hangup ( osb - > uuid_str , strlen ( osb - > uuid_str ) ) ;
2005-12-16 01:31:24 +03:00
atomic_set ( & osb - > vol_state , VOLUME_DISMOUNTED ) ;
2006-12-06 04:56:35 +03:00
if ( ocfs2_mount_local ( osb ) )
snprintf ( nodestr , sizeof ( nodestr ) , " local " ) ;
else
2008-01-31 02:38:24 +03:00
snprintf ( nodestr , sizeof ( nodestr ) , " %u " , osb - > node_num ) ;
2006-12-06 04:56:35 +03:00
printk ( KERN_INFO " ocfs2: Unmounting device (%s) on (node %s) \n " ,
osb - > dev_str , nodestr ) ;
2005-12-16 01:31:24 +03:00
ocfs2_delete_osb ( osb ) ;
kfree ( osb ) ;
sb - > s_dev = 0 ;
sb - > s_fs_info = NULL ;
}
static int ocfs2_setup_osb_uuid ( struct ocfs2_super * osb , const unsigned char * uuid ,
unsigned uuid_bytes )
{
int i , ret ;
char * ptr ;
BUG_ON ( uuid_bytes ! = OCFS2_VOL_UUID_LEN ) ;
2006-12-13 11:34:52 +03:00
osb - > uuid_str = kzalloc ( OCFS2_VOL_UUID_LEN * 2 + 1 , GFP_KERNEL ) ;
2005-12-16 01:31:24 +03:00
if ( osb - > uuid_str = = NULL )
return - ENOMEM ;
for ( i = 0 , ptr = osb - > uuid_str ; i < OCFS2_VOL_UUID_LEN ; i + + ) {
/* print with null */
ret = snprintf ( ptr , 3 , " %02X " , uuid [ i ] ) ;
if ( ret ! = 2 ) /* drop super cleans up */
return - EINVAL ;
/* then only advance past the last char */
ptr + = 2 ;
}
return 0 ;
}
2010-07-23 02:05:57 +04:00
/* Make sure entire volume is addressable by our journal. Requires
osb_clusters_at_boot to be valid and for the journal to have been
initialized by ocfs2_journal_init ( ) . */
static int ocfs2_journal_addressable ( struct ocfs2_super * osb )
{
int status = 0 ;
u64 max_block =
ocfs2_clusters_to_blocks ( osb - > sb ,
osb - > osb_clusters_at_boot ) - 1 ;
/* 32-bit block number is always OK. */
if ( max_block < = ( u32 ) ~ 0ULL )
goto out ;
/* Volume is "huge", so see if our journal is new enough to
support it . */
if ( ! ( OCFS2_HAS_COMPAT_FEATURE ( osb - > sb ,
OCFS2_FEATURE_COMPAT_JBD2_SB ) & &
jbd2_journal_check_used_features ( osb - > journal - > j_journal , 0 , 0 ,
JBD2_FEATURE_INCOMPAT_64BIT ) ) ) {
mlog ( ML_ERROR , " The journal cannot address the entire volume. "
" Enable the 'block64' journal option with tunefs.ocfs2 " ) ;
status = - EFBIG ;
goto out ;
}
out :
return status ;
}
2005-12-16 01:31:24 +03:00
static int ocfs2_initialize_super ( struct super_block * sb ,
struct buffer_head * bh ,
2009-01-07 01:57:08 +03:00
int sector_size ,
struct ocfs2_blockcheck_stats * stats )
2005-12-16 01:31:24 +03:00
{
2007-09-26 22:10:04 +04:00
int status ;
2007-07-20 23:56:16 +04:00
int i , cbits , bbits ;
struct ocfs2_dinode * di = ( struct ocfs2_dinode * ) bh - > b_data ;
2005-12-16 01:31:24 +03:00
struct inode * inode = NULL ;
struct ocfs2_journal * journal ;
struct ocfs2_super * osb ;
2010-07-23 02:05:57 +04:00
u64 total_blocks ;
2005-12-16 01:31:24 +03:00
2006-12-13 11:34:52 +03:00
osb = kzalloc ( sizeof ( struct ocfs2_super ) , GFP_KERNEL ) ;
2005-12-16 01:31:24 +03:00
if ( ! osb ) {
status = - ENOMEM ;
mlog_errno ( status ) ;
goto bail ;
}
sb - > s_fs_info = osb ;
sb - > s_op = & ocfs2_sops ;
2010-12-18 20:10:00 +03:00
sb - > s_d_op = & ocfs2_dentry_ops ;
2005-12-16 01:31:24 +03:00
sb - > s_export_op = & ocfs2_export_ops ;
2014-10-08 20:30:19 +04:00
sb - > s_qcop = & dquot_quotactl_sysfile_ops ;
2008-08-21 22:13:17 +04:00
sb - > dq_op = & ocfs2_quota_operations ;
2014-09-29 17:02:51 +04:00
sb - > s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP ;
2008-08-18 13:11:00 +04:00
sb - > s_xattr = ocfs2_xattr_handlers ;
2007-08-10 03:52:30 +04:00
sb - > s_time_gran = 1 ;
2005-12-16 01:31:24 +03:00
sb - > s_flags | = MS_NOATIME ;
/* this is needed to support O_LARGEFILE */
2007-07-20 23:56:16 +04:00
cbits = le32_to_cpu ( di - > id2 . i_super . s_clustersize_bits ) ;
bbits = le32_to_cpu ( di - > id2 . i_super . s_blocksize_bits ) ;
sb - > s_maxbytes = ocfs2_max_file_offset ( bbits , cbits ) ;
2015-04-15 01:46:39 +03:00
memcpy ( sb - > s_uuid , di - > id2 . i_super . s_uuid ,
sizeof ( di - > id2 . i_super . s_uuid ) ) ;
2005-12-16 01:31:24 +03:00
2008-11-13 03:27:44 +03:00
osb - > osb_dx_mask = ( 1 < < ( cbits - bbits ) ) - 1 ;
for ( i = 0 ; i < 3 ; i + + )
osb - > osb_dx_seed [ i ] = le32_to_cpu ( di - > id2 . i_super . s_dx_seed [ i ] ) ;
osb - > osb_dx_seed [ 3 ] = le32_to_cpu ( di - > id2 . i_super . s_uuid_hash ) ;
2005-12-16 01:31:24 +03:00
osb - > sb = sb ;
/* Save off for ocfs2_rw_direct */
osb - > s_sectsize_bits = blksize_bits ( sector_size ) ;
2006-01-27 12:32:52 +03:00
BUG_ON ( ! osb - > s_sectsize_bits ) ;
2005-12-16 01:31:24 +03:00
2007-09-25 02:56:19 +04:00
spin_lock_init ( & osb - > dc_task_lock ) ;
init_waitqueue_head ( & osb - > dc_event ) ;
osb - > dc_work_sequence = 0 ;
osb - > dc_wake_sequence = 0 ;
2005-12-16 01:31:24 +03:00
INIT_LIST_HEAD ( & osb - > blocked_lock_list ) ;
osb - > blocked_lock_count = 0 ;
spin_lock_init ( & osb - > osb_lock ) ;
2009-02-25 04:40:26 +03:00
spin_lock_init ( & osb - > osb_xattr_lock ) ;
2010-01-25 09:11:06 +03:00
ocfs2_init_steal_slots ( osb ) ;
2005-12-16 01:31:24 +03:00
2014-04-04 01:47:13 +04:00
mutex_init ( & osb - > system_file_mutex ) ;
2005-12-16 01:31:24 +03:00
atomic_set ( & osb - > alloc_stats . moves , 0 ) ;
atomic_set ( & osb - > alloc_stats . local_data , 0 ) ;
atomic_set ( & osb - > alloc_stats . bitmap_data , 0 ) ;
atomic_set ( & osb - > alloc_stats . bg_allocs , 0 ) ;
atomic_set ( & osb - > alloc_stats . bg_extends , 0 ) ;
2009-01-07 01:57:08 +03:00
/* Copy the blockcheck stats from the superblock probe */
osb - > osb_ecc_stats = * stats ;
2005-12-16 01:31:24 +03:00
ocfs2_init_node_maps ( osb ) ;
snprintf ( osb - > dev_str , sizeof ( osb - > dev_str ) , " %u,%u " ,
MAJOR ( osb - > sb - > s_dev ) , MINOR ( osb - > sb - > s_dev ) ) ;
2010-10-11 21:57:09 +04:00
osb - > max_slots = le16_to_cpu ( di - > id2 . i_super . s_max_slots ) ;
if ( osb - > max_slots > OCFS2_MAX_SLOTS | | osb - > max_slots = = 0 ) {
mlog ( ML_ERROR , " Invalid number of node slots (%u) \n " ,
osb - > max_slots ) ;
status = - EINVAL ;
goto bail ;
}
2009-07-08 01:22:12 +04:00
ocfs2_orphan_scan_init ( osb ) ;
2008-02-01 23:03:57 +03:00
status = ocfs2_recovery_init ( osb ) ;
if ( status ) {
mlog ( ML_ERROR , " Unable to initialize recovery state \n " ) ;
mlog_errno ( status ) ;
goto bail ;
}
2005-12-16 01:31:24 +03:00
init_waitqueue_head ( & osb - > checkpoint_event ) ;
2006-11-15 10:48:42 +03:00
osb - > s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM ;
2005-12-16 01:31:24 +03:00
osb - > slot_num = OCFS2_INVALID_SLOT ;
2008-08-18 13:11:46 +04:00
osb - > s_xattr_inline_size = le16_to_cpu (
di - > id2 . i_super . s_xattr_inline_size ) ;
2008-08-18 13:08:55 +04:00
2005-12-16 01:31:24 +03:00
osb - > local_alloc_state = OCFS2_LA_UNUSED ;
osb - > local_alloc_bh = NULL ;
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-29 05:02:53 +04:00
INIT_DELAYED_WORK ( & osb - > la_enable_wq , ocfs2_la_enable_worker ) ;
2005-12-16 01:31:24 +03:00
init_waitqueue_head ( & osb - > osb_mount_event ) ;
2009-12-08 00:10:48 +03:00
status = ocfs2_resmap_init ( osb , & osb - > osb_la_resmap ) ;
if ( status ) {
mlog_errno ( status ) ;
goto bail ;
}
2005-12-16 01:31:24 +03:00
osb - > vol_label = kmalloc ( OCFS2_MAX_VOL_LABEL_LEN , GFP_KERNEL ) ;
if ( ! osb - > vol_label ) {
mlog ( ML_ERROR , " unable to alloc vol label \n " ) ;
status = - ENOMEM ;
goto bail ;
}
2008-07-15 04:31:10 +04:00
osb - > slot_recovery_generations =
kcalloc ( osb - > max_slots , sizeof ( * osb - > slot_recovery_generations ) ,
GFP_KERNEL ) ;
if ( ! osb - > slot_recovery_generations ) {
status = - ENOMEM ;
mlog_errno ( status ) ;
goto bail ;
}
2006-02-23 04:35:08 +03:00
init_waitqueue_head ( & osb - > osb_wipe_event ) ;
osb - > osb_orphan_wipes = kcalloc ( osb - > max_slots ,
sizeof ( * osb - > osb_orphan_wipes ) ,
GFP_KERNEL ) ;
if ( ! osb - > osb_orphan_wipes ) {
status = - ENOMEM ;
mlog_errno ( status ) ;
goto bail ;
}
2009-08-24 07:13:37 +04:00
osb - > osb_rf_lock_tree = RB_ROOT ;
2005-12-16 01:31:24 +03:00
osb - > s_feature_compat =
le32_to_cpu ( OCFS2_RAW_SB ( di ) - > s_feature_compat ) ;
osb - > s_feature_ro_compat =
le32_to_cpu ( OCFS2_RAW_SB ( di ) - > s_feature_ro_compat ) ;
osb - > s_feature_incompat =
le32_to_cpu ( OCFS2_RAW_SB ( di ) - > s_feature_incompat ) ;
if ( ( i = OCFS2_HAS_INCOMPAT_FEATURE ( osb - > sb , ~ OCFS2_FEATURE_INCOMPAT_SUPP ) ) ) {
mlog ( ML_ERROR , " couldn't mount because of unsupported "
" optional features (%x). \n " , i ) ;
status = - EINVAL ;
goto bail ;
}
if ( ! ( osb - > sb - > s_flags & MS_RDONLY ) & &
( i = OCFS2_HAS_RO_COMPAT_FEATURE ( osb - > sb , ~ OCFS2_FEATURE_RO_COMPAT_SUPP ) ) ) {
mlog ( ML_ERROR , " couldn't mount RDWR because of "
" unsupported optional features (%x). \n " , i ) ;
status = - EINVAL ;
goto bail ;
}
2010-10-09 21:24:46 +04:00
if ( ocfs2_clusterinfo_valid ( osb ) ) {
osb - > osb_stackflags =
OCFS2_RAW_SB ( di ) - > s_cluster_info . ci_stackflags ;
2014-01-22 03:48:21 +04:00
strlcpy ( osb - > osb_cluster_stack ,
2008-02-02 02:08:23 +03:00
OCFS2_RAW_SB ( di ) - > s_cluster_info . ci_stack ,
2014-01-22 03:48:21 +04:00
OCFS2_STACK_LABEL_LEN + 1 ) ;
2008-02-02 02:08:23 +03:00
if ( strlen ( osb - > osb_cluster_stack ) ! = OCFS2_STACK_LABEL_LEN ) {
mlog ( ML_ERROR ,
" couldn't mount because of an invalid "
" cluster stack label (%s) \n " ,
osb - > osb_cluster_stack ) ;
status = - EINVAL ;
goto bail ;
}
2014-01-22 03:48:21 +04:00
strlcpy ( osb - > osb_cluster_name ,
OCFS2_RAW_SB ( di ) - > s_cluster_info . ci_cluster ,
OCFS2_CLUSTER_NAME_LEN + 1 ) ;
2008-02-02 02:08:23 +03:00
} else {
/* The empty string is identical with classic tools that
* don ' t know about s_cluster_info . */
osb - > osb_cluster_stack [ 0 ] = ' \0 ' ;
}
2005-12-16 01:31:24 +03:00
get_random_bytes ( & osb - > s_next_generation , sizeof ( u32 ) ) ;
/* FIXME
* This should be done in ocfs2_journal_init ( ) , but unknown
* ordering issues will cause the filesystem to crash .
* If anyone wants to figure out what part of the code
* refers to osb - > journal before ocfs2_journal_init ( ) is run ,
* be my guest .
*/
/* initialize our journal structure */
2006-12-13 11:34:52 +03:00
journal = kzalloc ( sizeof ( struct ocfs2_journal ) , GFP_KERNEL ) ;
2005-12-16 01:31:24 +03:00
if ( ! journal ) {
mlog ( ML_ERROR , " unable to alloc journal \n " ) ;
status = - ENOMEM ;
goto bail ;
}
osb - > journal = journal ;
journal - > j_osb = osb ;
atomic_set ( & journal - > j_num_trans , 0 ) ;
init_rwsem ( & journal - > j_trans_barrier ) ;
init_waitqueue_head ( & journal - > j_checkpointed ) ;
spin_lock_init ( & journal - > j_lock ) ;
journal - > j_trans_id = ( unsigned long ) 1 ;
INIT_LIST_HEAD ( & journal - > j_la_cleanups ) ;
2006-11-22 17:57:56 +03:00
INIT_WORK ( & journal - > j_recovery_work , ocfs2_complete_recovery ) ;
2005-12-16 01:31:24 +03:00
journal - > j_state = OCFS2_JOURNAL_FREE ;
2014-04-04 01:46:56 +04:00
INIT_WORK ( & osb - > dquot_drop_work , ocfs2_drop_dquot_refs ) ;
init_llist_head ( & osb - > dquot_drop_list ) ;
2005-12-16 01:31:24 +03:00
/* get some pseudo constants for clustersize bits */
osb - > s_clustersize_bits =
le32_to_cpu ( di - > id2 . i_super . s_clustersize_bits ) ;
osb - > s_clustersize = 1 < < osb - > s_clustersize_bits ;
if ( osb - > s_clustersize < OCFS2_MIN_CLUSTERSIZE | |
osb - > s_clustersize > OCFS2_MAX_CLUSTERSIZE ) {
mlog ( ML_ERROR , " Volume has invalid cluster size (%d) \n " ,
osb - > s_clustersize ) ;
status = - EINVAL ;
goto bail ;
}
2010-07-23 02:05:57 +04:00
total_blocks = ocfs2_clusters_to_blocks ( osb - > sb ,
le32_to_cpu ( di - > i_clusters ) ) ;
status = generic_check_addressable ( osb - > sb - > s_blocksize_bits ,
total_blocks ) ;
if ( status ) {
mlog ( ML_ERROR , " Volume too large "
" to mount safely on this system " ) ;
status = - EFBIG ;
2005-12-16 01:31:24 +03:00
goto bail ;
}
if ( ocfs2_setup_osb_uuid ( osb , di - > id2 . i_super . s_uuid ,
sizeof ( di - > id2 . i_super . s_uuid ) ) ) {
mlog ( ML_ERROR , " Out of memory trying to setup our uuid. \n " ) ;
status = - ENOMEM ;
goto bail ;
}
2014-06-05 03:06:07 +04:00
strlcpy ( osb - > vol_label , di - > id2 . i_super . s_label ,
OCFS2_MAX_VOL_LABEL_LEN ) ;
2005-12-16 01:31:24 +03:00
osb - > root_blkno = le64_to_cpu ( di - > id2 . i_super . s_root_blkno ) ;
osb - > system_dir_blkno = le64_to_cpu ( di - > id2 . i_super . s_system_dir_blkno ) ;
osb - > first_cluster_group_blkno =
le64_to_cpu ( di - > id2 . i_super . s_first_cluster_group ) ;
osb - > fs_generation = le32_to_cpu ( di - > i_fs_generation ) ;
2008-08-18 13:11:00 +04:00
osb - > uuid_hash = le32_to_cpu ( di - > id2 . i_super . s_uuid_hash ) ;
2011-02-23 16:29:08 +03:00
trace_ocfs2_initialize_super ( osb - > vol_label , osb - > uuid_str ,
( unsigned long long ) osb - > root_blkno ,
( unsigned long long ) osb - > system_dir_blkno ,
osb - > s_clustersize_bits ) ;
2005-12-16 01:31:24 +03:00
osb - > osb_dlm_debug = ocfs2_new_dlm_debug ( ) ;
if ( ! osb - > osb_dlm_debug ) {
status = - ENOMEM ;
mlog_errno ( status ) ;
goto bail ;
}
atomic_set ( & osb - > vol_state , VOLUME_INIT ) ;
/* load root, system_dir, and all global system inodes */
status = ocfs2_init_global_system_inodes ( osb ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
/*
* global bitmap
*/
inode = ocfs2_get_system_file_inode ( osb , GLOBAL_BITMAP_SYSTEM_INODE ,
OCFS2_INVALID_SLOT ) ;
if ( ! inode ) {
status = - EINVAL ;
mlog_errno ( status ) ;
goto bail ;
}
osb - > bitmap_blkno = OCFS2_I ( inode ) - > ip_blkno ;
2010-04-06 05:17:14 +04:00
osb - > osb_clusters_at_boot = OCFS2_I ( inode ) - > ip_clusters ;
2005-12-16 01:31:24 +03:00
iput ( inode ) ;
2010-04-13 10:38:06 +04:00
osb - > bitmap_cpg = ocfs2_group_bitmap_size ( sb , 0 ,
osb - > s_feature_incompat ) * 8 ;
2005-12-16 01:31:24 +03:00
status = ocfs2_init_slot_info ( osb ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
2015-04-15 01:46:42 +03:00
cleancache_init_shared_fs ( sb ) ;
2005-12-16 01:31:24 +03:00
bail :
return status ;
}
/*
* will return : - EAGAIN if it is ok to keep searching for superblocks
* - EINVAL if there is a bad superblock
* 0 on success
*/
static int ocfs2_verify_volume ( struct ocfs2_dinode * di ,
struct buffer_head * bh ,
2009-01-07 01:57:08 +03:00
u32 blksz ,
struct ocfs2_blockcheck_stats * stats )
2005-12-16 01:31:24 +03:00
{
int status = - EAGAIN ;
if ( memcmp ( di - > i_signature , OCFS2_SUPER_BLOCK_SIGNATURE ,
strlen ( OCFS2_SUPER_BLOCK_SIGNATURE ) ) = = 0 ) {
2008-12-12 02:04:14 +03:00
/* We have to do a raw check of the feature here */
if ( le32_to_cpu ( di - > id2 . i_super . s_feature_incompat ) &
OCFS2_FEATURE_INCOMPAT_META_ECC ) {
status = ocfs2_block_check_validate ( bh - > b_data ,
bh - > b_size ,
2009-01-07 01:57:08 +03:00
& di - > i_check ,
stats ) ;
2008-12-12 02:04:14 +03:00
if ( status )
goto out ;
}
2005-12-16 01:31:24 +03:00
status = - EINVAL ;
if ( ( 1 < < le32_to_cpu ( di - > id2 . i_super . s_blocksize_bits ) ) ! = blksz ) {
mlog ( ML_ERROR , " found superblock with incorrect block "
" size: found %u, should be %u \n " ,
1 < < le32_to_cpu ( di - > id2 . i_super . s_blocksize_bits ) ,
blksz ) ;
} else if ( le16_to_cpu ( di - > id2 . i_super . s_major_rev_level ) ! =
OCFS2_MAJOR_REV_LEVEL | |
le16_to_cpu ( di - > id2 . i_super . s_minor_rev_level ) ! =
OCFS2_MINOR_REV_LEVEL ) {
mlog ( ML_ERROR , " found superblock with bad version: "
" found %u.%u, should be %u.%u \n " ,
le16_to_cpu ( di - > id2 . i_super . s_major_rev_level ) ,
le16_to_cpu ( di - > id2 . i_super . s_minor_rev_level ) ,
OCFS2_MAJOR_REV_LEVEL ,
OCFS2_MINOR_REV_LEVEL ) ;
} else if ( bh - > b_blocknr ! = le64_to_cpu ( di - > i_blkno ) ) {
mlog ( ML_ERROR , " bad block number on superblock: "
2006-03-03 21:24:33 +03:00
" found %llu, should be %llu \n " ,
2007-04-28 03:01:25 +04:00
( unsigned long long ) le64_to_cpu ( di - > i_blkno ) ,
2006-03-03 21:24:33 +03:00
( unsigned long long ) bh - > b_blocknr ) ;
2005-12-16 01:31:24 +03:00
} else if ( le32_to_cpu ( di - > id2 . i_super . s_clustersize_bits ) < 12 | |
le32_to_cpu ( di - > id2 . i_super . s_clustersize_bits ) > 20 ) {
mlog ( ML_ERROR , " bad cluster size found: %u \n " ,
1 < < le32_to_cpu ( di - > id2 . i_super . s_clustersize_bits ) ) ;
} else if ( ! le64_to_cpu ( di - > id2 . i_super . s_root_blkno ) ) {
mlog ( ML_ERROR , " bad root_blkno: 0 \n " ) ;
} else if ( ! le64_to_cpu ( di - > id2 . i_super . s_system_dir_blkno ) ) {
mlog ( ML_ERROR , " bad system_dir_blkno: 0 \n " ) ;
} else if ( le16_to_cpu ( di - > id2 . i_super . s_max_slots ) > OCFS2_MAX_SLOTS ) {
mlog ( ML_ERROR ,
" Superblock slots found greater than file system "
" maximum: found %u, max %u \n " ,
le16_to_cpu ( di - > id2 . i_super . s_max_slots ) ,
OCFS2_MAX_SLOTS ) ;
} else {
/* found it! */
status = 0 ;
}
}
2008-12-12 02:04:14 +03:00
out :
2011-03-07 11:43:21 +03:00
if ( status & & status ! = - EAGAIN )
mlog_errno ( status ) ;
2005-12-16 01:31:24 +03:00
return status ;
}
static int ocfs2_check_volume ( struct ocfs2_super * osb )
{
2007-09-26 22:10:04 +04:00
int status ;
2005-12-16 01:31:24 +03:00
int dirty ;
2006-12-06 04:56:35 +03:00
int local ;
2005-12-16 01:31:24 +03:00
struct ocfs2_dinode * local_alloc = NULL ; /* only used if we
* recover
* ourselves . */
/* Init our journal object. */
status = ocfs2_journal_init ( osb - > journal , & dirty ) ;
if ( status < 0 ) {
mlog ( ML_ERROR , " Could not initialize journal! \n " ) ;
goto finally ;
}
2010-07-23 02:05:57 +04:00
/* Now that journal has been initialized, check to make sure
entire volume is addressable . */
status = ocfs2_journal_addressable ( osb ) ;
if ( status )
goto finally ;
2005-12-16 01:31:24 +03:00
/* If the journal was unmounted cleanly then we don't want to
* recover anything . Otherwise , journal_load will do that
* dirty work for us : ) */
if ( ! dirty ) {
status = ocfs2_journal_wipe ( osb - > journal , 0 ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto finally ;
}
} else {
2011-07-24 21:34:54 +04:00
printk ( KERN_NOTICE " ocfs2: File system on device (%s) was not "
" unmounted cleanly, recovering it. \n " , osb - > dev_str ) ;
2005-12-16 01:31:24 +03:00
}
2006-12-06 04:56:35 +03:00
local = ocfs2_mount_local ( osb ) ;
2005-12-16 01:31:24 +03:00
/* will play back anything left in the journal. */
2008-07-15 04:31:10 +04:00
status = ocfs2_journal_load ( osb - > journal , local , dirty ) ;
2008-06-10 10:24:48 +04:00
if ( status < 0 ) {
mlog ( ML_ERROR , " ocfs2 journal load failed! %d \n " , status ) ;
goto finally ;
}
2005-12-16 01:31:24 +03:00
2015-02-11 01:09:04 +03:00
if ( osb - > s_mount_opt & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT )
jbd2_journal_set_features ( osb - > journal - > j_journal ,
JBD2_FEATURE_COMPAT_CHECKSUM , 0 ,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT ) ;
else
jbd2_journal_clear_features ( osb - > journal - > j_journal ,
JBD2_FEATURE_COMPAT_CHECKSUM , 0 ,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT ) ;
2005-12-16 01:31:24 +03:00
if ( dirty ) {
/* recover my local alloc if we didn't unmount cleanly. */
status = ocfs2_begin_local_alloc_recovery ( osb ,
osb - > slot_num ,
& local_alloc ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto finally ;
}
/* we complete the recovery process after we've marked
* ourselves as mounted . */
}
status = ocfs2_load_local_alloc ( osb ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto finally ;
}
if ( dirty ) {
/* Recovery will be completed after we've mounted the
* rest of the volume . */
osb - > dirty = 1 ;
osb - > local_alloc_copy = local_alloc ;
local_alloc = NULL ;
}
/* go through each journal, trylock it and if you get the
* lock , and it ' s marked as dirty , set the bit in the recover
* map and launch a recovery thread for it . */
status = ocfs2_mark_dead_nodes ( osb ) ;
2009-03-07 01:21:46 +03:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto finally ;
}
status = ocfs2_compute_replay_slots ( osb ) ;
2005-12-16 01:31:24 +03:00
if ( status < 0 )
mlog_errno ( status ) ;
finally :
2013-02-22 04:42:44 +04:00
kfree ( local_alloc ) ;
2005-12-16 01:31:24 +03:00
2011-03-07 11:43:21 +03:00
if ( status )
mlog_errno ( status ) ;
2005-12-16 01:31:24 +03:00
return status ;
}
/*
* The routine gets called from dismount or close whenever a dismount on
* volume is requested and the osb open count becomes 1.
* It will remove the osb from the global list and also free up all the
* initialized resources and fileobject .
*/
static void ocfs2_delete_osb ( struct ocfs2_super * osb )
{
/* This function assumes that the caller has the main osb resource */
2008-02-01 22:59:09 +03:00
ocfs2_free_slot_info ( osb ) ;
2005-12-16 01:31:24 +03:00
2006-02-23 04:35:08 +03:00
kfree ( osb - > osb_orphan_wipes ) ;
2008-07-15 04:31:10 +04:00
kfree ( osb - > slot_recovery_generations ) ;
2005-12-16 01:31:24 +03:00
/* FIXME
* This belongs in journal shutdown , but because we have to
2010-06-11 14:17:00 +04:00
* allocate osb - > journal at the start of ocfs2_initialize_osb ( ) ,
2005-12-16 01:31:24 +03:00
* we free it here .
*/
kfree ( osb - > journal ) ;
2013-02-22 04:42:44 +04:00
kfree ( osb - > local_alloc_copy ) ;
2005-12-16 01:31:24 +03:00
kfree ( osb - > uuid_str ) ;
2014-09-26 03:05:11 +04:00
kfree ( osb - > vol_label ) ;
2005-12-16 01:31:24 +03:00
ocfs2_put_dlm_debug ( osb - > osb_dlm_debug ) ;
memset ( osb , 0 , sizeof ( struct ocfs2_super ) ) ;
}
2015-09-05 01:44:11 +03:00
/* Depending on the mount option passed, perform one of the following:
* Put OCFS2 into a readonly state ( default )
* Return EIO so that only the process errs
* Fix the error as if fsck . ocfs2 - y
* panic
*/
static int ocfs2_handle_error ( struct super_block * sb )
2005-12-16 01:31:24 +03:00
{
struct ocfs2_super * osb = OCFS2_SB ( sb ) ;
2015-09-05 01:44:11 +03:00
int rv = 0 ;
2005-12-16 01:31:24 +03:00
ocfs2_set_osb_flag ( osb , OCFS2_OSB_ERROR_FS ) ;
2015-09-05 01:44:11 +03:00
pr_crit ( " On-disk corruption discovered. "
" Please run fsck.ocfs2 once the filesystem is unmounted. \n " ) ;
2005-12-16 01:31:24 +03:00
2015-09-05 01:44:11 +03:00
if ( osb - > s_mount_opt & OCFS2_MOUNT_ERRORS_PANIC ) {
panic ( " OCFS2: (device %s): panic forced after error \n " ,
sb - > s_id ) ;
} else if ( osb - > s_mount_opt & OCFS2_MOUNT_ERRORS_CONT ) {
pr_crit ( " OCFS2: Returning error to the calling process. \n " ) ;
rv = - EIO ;
} else { /* default option */
rv = - EROFS ;
if ( sb - > s_flags & MS_RDONLY & &
( ocfs2_is_soft_readonly ( osb ) | |
ocfs2_is_hard_readonly ( osb ) ) )
return rv ;
pr_crit ( " OCFS2: File system is now read-only. \n " ) ;
sb - > s_flags | = MS_RDONLY ;
ocfs2_set_ro_flag ( osb , 0 ) ;
}
return rv ;
2005-12-16 01:31:24 +03:00
}
2015-09-05 01:44:11 +03:00
int __ocfs2_error ( struct super_block * sb , const char * function ,
2015-04-15 01:43:46 +03:00
const char * fmt , . . . )
2005-12-16 01:31:24 +03:00
{
2015-04-15 01:43:46 +03:00
struct va_format vaf ;
2005-12-16 01:31:24 +03:00
va_list args ;
va_start ( args , fmt ) ;
2015-04-15 01:43:46 +03:00
vaf . fmt = fmt ;
vaf . va = & args ;
2005-12-16 01:31:24 +03:00
/* Not using mlog here because we want to show the actual
* function the error came from . */
2015-09-05 01:44:51 +03:00
printk ( KERN_CRIT " OCFS2: ERROR (device %s): %s: %pV " ,
2015-04-15 01:43:46 +03:00
sb - > s_id , function , & vaf ) ;
va_end ( args ) ;
2005-12-16 01:31:24 +03:00
2015-09-05 01:44:11 +03:00
return ocfs2_handle_error ( sb ) ;
2005-12-16 01:31:24 +03:00
}
/* Handle critical errors. This is intentionally more drastic than
* ocfs2_handle_error , so we only use for things like journal errors ,
* etc . */
2015-04-15 01:43:46 +03:00
void __ocfs2_abort ( struct super_block * sb , const char * function ,
2005-12-16 01:31:24 +03:00
const char * fmt , . . . )
{
2015-04-15 01:43:46 +03:00
struct va_format vaf ;
2005-12-16 01:31:24 +03:00
va_list args ;
va_start ( args , fmt ) ;
2015-04-15 01:43:46 +03:00
vaf . fmt = fmt ;
vaf . va = & args ;
2015-09-05 01:44:51 +03:00
printk ( KERN_CRIT " OCFS2: abort (device %s): %s: %pV " ,
2015-04-15 01:43:46 +03:00
sb - > s_id , function , & vaf ) ;
va_end ( args ) ;
2005-12-16 01:31:24 +03:00
/* We don't have the cluster support yet to go straight to
* hard readonly in here . Until then , we want to keep
* ocfs2_abort ( ) so that we can at least mark critical
* errors .
*
* TODO : This should abort the journal and alert other nodes
* that our slot needs recovery . */
/* Force a panic(). This stinks, but it's better than letting
* things continue without having a proper hard readonly
* here . */
2009-08-20 02:16:01 +04:00
if ( ! ocfs2_mount_local ( OCFS2_SB ( sb ) ) )
OCFS2_SB ( sb ) - > s_mount_opt | = OCFS2_MOUNT_ERRORS_PANIC ;
2005-12-16 01:31:24 +03:00
ocfs2_handle_error ( sb ) ;
}
2009-09-03 04:17:36 +04:00
/*
* Void signal blockers , because in - kernel sigprocmask ( ) only fails
* when SIG_ * is wrong .
*/
void ocfs2_block_signals ( sigset_t * oldset )
{
int rc ;
sigset_t blocked ;
sigfillset ( & blocked ) ;
rc = sigprocmask ( SIG_BLOCK , & blocked , oldset ) ;
BUG_ON ( rc ) ;
}
void ocfs2_unblock_signals ( sigset_t * oldset )
{
int rc = sigprocmask ( SIG_SETMASK , oldset , NULL ) ;
BUG_ON ( rc ) ;
}
2005-12-16 01:31:24 +03:00
module_init ( ocfs2_init ) ;
module_exit ( ocfs2_exit ) ;