2019-05-24 12:04:05 +02:00
// SPDX-License-Identifier: GPL-2.0-or-later
2021-05-06 18:06:44 -07:00
/*
2005-12-15 14:31:24 -08:00
* localalloc . c
*
* Node local data allocation
*
* Copyright ( C ) 2002 , 2004 Oracle . All rights reserved .
*/
# include <linux/fs.h>
# include <linux/types.h>
# include <linux/slab.h>
# include <linux/highmem.h>
# include <linux/bitops.h>
# include <cluster/masklog.h>
# include "ocfs2.h"
# include "alloc.h"
2008-10-17 19:25:01 -07:00
# include "blockcheck.h"
2005-12-15 14:31:24 -08:00
# include "dlmglue.h"
# include "inode.h"
# include "journal.h"
# include "localalloc.h"
# include "suballoc.h"
# include "super.h"
# include "sysfile.h"
2011-02-22 07:56:45 +08:00
# include "ocfs2_trace.h"
2005-12-15 14:31:24 -08:00
# include "buffer_head_io.h"
# define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
static u32 ocfs2_local_alloc_count_bits ( struct ocfs2_dinode * alloc ) ;
static int ocfs2_local_alloc_find_clear_bits ( struct ocfs2_super * osb ,
struct ocfs2_dinode * alloc ,
2009-12-07 13:10:48 -08:00
u32 * numbits ,
struct ocfs2_alloc_reservation * resv ) ;
2005-12-15 14:31:24 -08:00
static void ocfs2_clear_local_alloc ( struct ocfs2_dinode * alloc ) ;
static int ocfs2_sync_local_to_main ( struct ocfs2_super * osb ,
2006-10-09 18:11:45 -07:00
handle_t * handle ,
2005-12-15 14:31:24 -08:00
struct ocfs2_dinode * alloc ,
struct inode * main_bm_inode ,
struct buffer_head * main_bm_bh ) ;
static int ocfs2_local_alloc_reserve_for_window ( struct ocfs2_super * osb ,
struct ocfs2_alloc_context * * ac ,
struct inode * * bitmap_inode ,
struct buffer_head * * bitmap_bh ) ;
static int ocfs2_local_alloc_new_window ( struct ocfs2_super * osb ,
2006-10-09 18:11:45 -07:00
handle_t * handle ,
2005-12-15 14:31:24 -08:00
struct ocfs2_alloc_context * ac ) ;
static int ocfs2_local_alloc_slide_window ( struct ocfs2_super * osb ,
struct inode * local_alloc_inode ) ;
2010-04-05 18:17:14 -07:00
/*
* ocfs2_la_default_mb ( ) - determine a default size , in megabytes of
* the local alloc .
*
* Generally , we ' d like to pick as large a local alloc as
* possible . Performance on large workloads tends to scale
* proportionally to la size . In addition to that , the reservations
* code functions more efficiently as it can reserve more windows for
* write .
*
* Some things work against us when trying to choose a large local alloc :
*
* - We need to ensure our sizing is picked to leave enough space in
* group descriptors for other allocations ( such as block groups ,
* etc ) . Picking default sizes which are a multiple of 4 could help
* - block groups are allocated in 2 mb and 4 mb chunks .
*
* - Likewise , we don ' t want to starve other nodes of bits on small
* file systems . This can easily be taken care of by limiting our
* default to a reasonable size ( 256 M ) on larger cluster sizes .
*
* - Some file systems can ' t support very large sizes - 4 k and 8 k in
* particular are limited to less than 128 and 256 megabytes respectively .
*
* The following reference table shows group descriptor and local
* alloc maximums at various cluster sizes ( 4 k blocksize )
*
* csize : 4 K group : 126 M la : 121 M
* csize : 8 K group : 252 M la : 243 M
* csize : 16 K group : 504 M la : 486 M
* csize : 32 K group : 1008 M la : 972 M
* csize : 64 K group : 2016 M la : 1944 M
* csize : 128 K group : 4032 M la : 3888 M
* csize : 256 K group : 8064 M la : 7776 M
* csize : 512 K group : 16128 M la : 15552 M
* csize : 1024 K group : 32256 M la : 31104 M
*/
# define OCFS2_LA_MAX_DEFAULT_MB 256
# define OCFS2_LA_OLD_DEFAULT 8
unsigned int ocfs2_la_default_mb ( struct ocfs2_super * osb )
{
unsigned int la_mb ;
unsigned int gd_mb ;
2010-06-09 16:43:05 +08:00
unsigned int la_max_mb ;
2010-04-05 18:17:14 -07:00
unsigned int megs_per_slot ;
struct super_block * sb = osb - > sb ;
gd_mb = ocfs2_clusters_to_megabytes ( osb - > sb ,
2010-04-13 14:38:06 +08:00
8 * ocfs2_group_bitmap_size ( sb , 0 , osb - > s_feature_incompat ) ) ;
2010-04-05 18:17:14 -07:00
/*
* This takes care of files systems with very small group
* descriptors - 512 byte blocksize at cluster sizes lower
* than 16 K and also 1 k blocksize with 4 k cluster size .
*/
if ( ( sb - > s_blocksize = = 512 & & osb - > s_clustersize < = 8192 )
| | ( sb - > s_blocksize = = 1024 & & osb - > s_clustersize = = 4096 ) )
return OCFS2_LA_OLD_DEFAULT ;
/*
* Leave enough room for some block groups and make the final
* value we work from a multiple of 4.
*/
gd_mb - = 16 ;
gd_mb & = 0xFFFFFFFB ;
la_mb = gd_mb ;
/*
* Keep window sizes down to a reasonable default
*/
if ( la_mb > OCFS2_LA_MAX_DEFAULT_MB ) {
/*
* Some clustersize / blocksize combinations will have
* given us a larger than OCFS2_LA_MAX_DEFAULT_MB
* default size , but get poor distribution when
* limited to exactly 256 megabytes .
*
* As an example , 16 K clustersize at 4 K blocksize
* gives us a cluster group size of 504 M . Paring the
* local alloc size down to 256 however , would give us
* only one window and around 200 MB left in the
* cluster group . Instead , find the first size below
* 256 which would give us an even distribution .
*
* Larger cluster group sizes actually work out pretty
* well when pared to 256 , so we don ' t have to do this
* for any group that fits more than two
* OCFS2_LA_MAX_DEFAULT_MB windows .
*/
if ( gd_mb > ( 2 * OCFS2_LA_MAX_DEFAULT_MB ) )
la_mb = 256 ;
else {
unsigned int gd_mult = gd_mb ;
while ( gd_mult > 256 )
gd_mult = gd_mult > > 1 ;
la_mb = gd_mult ;
}
}
megs_per_slot = osb - > osb_clusters_at_boot / osb - > max_slots ;
megs_per_slot = ocfs2_clusters_to_megabytes ( osb - > sb , megs_per_slot ) ;
/* Too many nodes, too few disk clusters. */
if ( megs_per_slot < la_mb )
la_mb = megs_per_slot ;
2010-06-09 16:43:05 +08:00
/* We can't store more bits than we can in a block. */
la_max_mb = ocfs2_clusters_to_megabytes ( osb - > sb ,
ocfs2_local_alloc_size ( sb ) * 8 ) ;
if ( la_mb > la_max_mb )
la_mb = la_max_mb ;
2010-04-05 18:17:14 -07:00
return la_mb ;
}
2010-04-05 18:17:13 -07:00
void ocfs2_la_set_sizes ( struct ocfs2_super * osb , int requested_mb )
{
struct super_block * sb = osb - > sb ;
2010-04-05 18:17:14 -07:00
unsigned int la_default_mb = ocfs2_la_default_mb ( osb ) ;
2010-04-05 18:17:13 -07:00
unsigned int la_max_mb ;
la_max_mb = ocfs2_clusters_to_megabytes ( sb ,
ocfs2_local_alloc_size ( sb ) * 8 ) ;
2011-02-22 07:56:45 +08:00
trace_ocfs2_la_set_sizes ( requested_mb , la_max_mb , la_default_mb ) ;
2010-04-05 18:17:13 -07:00
if ( requested_mb = = - 1 ) {
/* No user request - use defaults */
osb - > local_alloc_default_bits =
ocfs2_megabytes_to_clusters ( sb , la_default_mb ) ;
} else if ( requested_mb > la_max_mb ) {
/* Request is too big, we give the maximum available */
osb - > local_alloc_default_bits =
ocfs2_megabytes_to_clusters ( sb , la_max_mb ) ;
} else {
osb - > local_alloc_default_bits =
ocfs2_megabytes_to_clusters ( sb , requested_mb ) ;
}
osb - > local_alloc_bits = osb - > local_alloc_default_bits ;
}
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
static inline int ocfs2_la_state_enabled ( struct ocfs2_super * osb )
{
return ( osb - > local_alloc_state = = OCFS2_LA_THROTTLED | |
osb - > local_alloc_state = = OCFS2_LA_ENABLED ) ;
}
void ocfs2_local_alloc_seen_free_bits ( struct ocfs2_super * osb ,
unsigned int num_clusters )
{
spin_lock ( & osb - > osb_lock ) ;
if ( osb - > local_alloc_state = = OCFS2_LA_DISABLED | |
osb - > local_alloc_state = = OCFS2_LA_THROTTLED )
if ( num_clusters > = osb - > local_alloc_default_bits ) {
cancel_delayed_work ( & osb - > la_enable_wq ) ;
osb - > local_alloc_state = OCFS2_LA_ENABLED ;
}
spin_unlock ( & osb - > osb_lock ) ;
}
void ocfs2_la_enable_worker ( struct work_struct * work )
{
struct ocfs2_super * osb =
container_of ( work , struct ocfs2_super ,
la_enable_wq . work ) ;
spin_lock ( & osb - > osb_lock ) ;
osb - > local_alloc_state = OCFS2_LA_ENABLED ;
spin_unlock ( & osb - > osb_lock ) ;
}
2005-12-15 14:31:24 -08:00
/*
* Tell us whether a given allocation should use the local alloc
* file . Otherwise , it has to go to the main bitmap .
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
*
* This function does semi - dirty reads of local alloc size and state !
* This is ok however , as the values are re - checked once under mutex .
2005-12-15 14:31:24 -08:00
*/
int ocfs2_alloc_should_use_local ( struct ocfs2_super * osb , u64 bits )
{
2007-12-20 14:58:11 -08:00
int ret = 0 ;
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
int la_bits ;
spin_lock ( & osb - > osb_lock ) ;
la_bits = osb - > local_alloc_bits ;
2005-12-15 14:31:24 -08:00
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
if ( ! ocfs2_la_state_enabled ( osb ) )
2007-12-20 14:58:11 -08:00
goto bail ;
2005-12-15 14:31:24 -08:00
/* la_bits should be at least twice the size (in clusters) of
* a new block group . We want to be sure block group
* allocations go through the local alloc , so allow an
* allocation to take up to half the bitmap . */
if ( bits > ( la_bits / 2 ) )
2007-12-20 14:58:11 -08:00
goto bail ;
2005-12-15 14:31:24 -08:00
2007-12-20 14:58:11 -08:00
ret = 1 ;
bail :
2011-02-22 07:56:45 +08:00
trace_ocfs2_alloc_should_use_local (
( unsigned long long ) bits , osb - > local_alloc_state , la_bits , ret ) ;
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
spin_unlock ( & osb - > osb_lock ) ;
2007-12-20 14:58:11 -08:00
return ret ;
2005-12-15 14:31:24 -08:00
}
int ocfs2_load_local_alloc ( struct ocfs2_super * osb )
{
int status = 0 ;
struct ocfs2_dinode * alloc = NULL ;
struct buffer_head * alloc_bh = NULL ;
u32 num_used ;
struct inode * inode = NULL ;
struct ocfs2_local_alloc * la ;
2008-07-28 14:55:20 -07:00
if ( osb - > local_alloc_bits = = 0 )
2007-12-20 14:58:11 -08:00
goto bail ;
2008-07-28 14:55:20 -07:00
if ( osb - > local_alloc_bits > = osb - > bitmap_cpg ) {
2007-12-20 14:58:11 -08:00
mlog ( ML_NOTICE , " Requested local alloc window %d is larger "
" than max possible %u. Using defaults. \n " ,
2008-07-28 14:55:20 -07:00
osb - > local_alloc_bits , ( osb - > bitmap_cpg - 1 ) ) ;
osb - > local_alloc_bits =
ocfs2_megabytes_to_clusters ( osb - > sb ,
2010-04-05 18:17:14 -07:00
ocfs2_la_default_mb ( osb ) ) ;
2007-12-20 14:58:11 -08:00
}
2005-12-15 14:31:24 -08:00
/* read the alloc off disk */
inode = ocfs2_get_system_file_inode ( osb , LOCAL_ALLOC_SYSTEM_INODE ,
osb - > slot_num ) ;
if ( ! inode ) {
status = - EINVAL ;
mlog_errno ( status ) ;
goto bail ;
}
2008-11-13 14:49:11 -08:00
status = ocfs2_read_inode_block_full ( inode , & alloc_bh ,
OCFS2_BH_IGNORE_CACHE ) ;
2005-12-15 14:31:24 -08:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
alloc = ( struct ocfs2_dinode * ) alloc_bh - > b_data ;
la = OCFS2_LOCAL_ALLOC ( alloc ) ;
if ( ! ( le32_to_cpu ( alloc - > i_flags ) &
( OCFS2_LOCAL_ALLOC_FL | OCFS2_BITMAP_FL ) ) ) {
2006-03-03 10:24:33 -08:00
mlog ( ML_ERROR , " Invalid local alloc inode, %llu \n " ,
( unsigned long long ) OCFS2_I ( inode ) - > ip_blkno ) ;
2005-12-15 14:31:24 -08:00
status = - EINVAL ;
goto bail ;
}
if ( ( la - > la_size = = 0 ) | |
( le16_to_cpu ( la - > la_size ) > ocfs2_local_alloc_size ( inode - > i_sb ) ) ) {
mlog ( ML_ERROR , " Local alloc size is invalid (la_size = %u) \n " ,
le16_to_cpu ( la - > la_size ) ) ;
status = - EINVAL ;
goto bail ;
}
/* do a little verification. */
num_used = ocfs2_local_alloc_count_bits ( alloc ) ;
/* hopefully the local alloc has always been recovered before
* we load it . */
if ( num_used
| | alloc - > id1 . bitmap1 . i_used
| | alloc - > id1 . bitmap1 . i_total
ocfs2: fix panic due to unrecovered local alloc
mount.ocfs2 ignore the inconsistent error that journal is clean but
local alloc is unrecovered. After mount, local alloc not empty, then
reserver cluster didn't alloc a new local alloc window, reserveration
map is empty(ocfs2_reservation_map.m_bitmap_len = 0), that triggered the
following panic.
This issue was reported at
https://oss.oracle.com/pipermail/ocfs2-devel/2015-May/010854.html
and was advised to fixed during mount. But this is a very unusual
inconsistent state, usually journal dirty flag should be cleared at the
last stage of umount until every other things go right. We may need do
further debug to check that. Any way to avoid possible futher
corruption, mount should be abort and fsck should be run.
(mount.ocfs2,1765,1):ocfs2_load_local_alloc:353 ERROR: Local alloc hasn't been recovered!
found = 6518, set = 6518, taken = 8192, off = 15912372
ocfs2: Mounting device (202,64) on (node 0, slot 3) with ordered data mode.
o2dlm: Joining domain 89CEAC63CC4F4D03AC185B44E0EE0F3F ( 0 1 2 3 4 5 6 8 ) 8 nodes
ocfs2: Mounting device (202,80) on (node 0, slot 3) with ordered data mode.
o2hb: Region 89CEAC63CC4F4D03AC185B44E0EE0F3F (xvdf) is now a quorum device
o2net: Accepted connection from node yvwsoa17p (num 7) at 172.22.77.88:7777
o2dlm: Node 7 joins domain 64FE421C8C984E6D96ED12C55FEE2435 ( 0 1 2 3 4 5 6 7 8 ) 9 nodes
o2dlm: Node 7 joins domain 89CEAC63CC4F4D03AC185B44E0EE0F3F ( 0 1 2 3 4 5 6 7 8 ) 9 nodes
------------[ cut here ]------------
kernel BUG at fs/ocfs2/reservations.c:507!
invalid opcode: 0000 [#1] SMP
Modules linked in: ocfs2 rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs fscache lockd grace ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sunrpc ipt_REJECT nf_reject_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 ovmapi ppdev parport_pc parport xen_netfront fb_sys_fops sysimgblt sysfillrect syscopyarea acpi_cpufreq pcspkr i2c_piix4 i2c_core sg ext4 jbd2 mbcache2 sr_mod cdrom xen_blkfront pata_acpi ata_generic ata_piix floppy dm_mirror dm_region_hash dm_log dm_mod
CPU: 0 PID: 4349 Comm: startWebLogic.s Not tainted 4.1.12-124.19.2.el6uek.x86_64 #2
Hardware name: Xen HVM domU, BIOS 4.4.4OVM 09/06/2018
task: ffff8803fb04e200 ti: ffff8800ea4d8000 task.ti: ffff8800ea4d8000
RIP: 0010:[<ffffffffa05e96a8>] [<ffffffffa05e96a8>] __ocfs2_resv_find_window+0x498/0x760 [ocfs2]
Call Trace:
ocfs2_resmap_resv_bits+0x10d/0x400 [ocfs2]
ocfs2_claim_local_alloc_bits+0xd0/0x640 [ocfs2]
__ocfs2_claim_clusters+0x178/0x360 [ocfs2]
ocfs2_claim_clusters+0x1f/0x30 [ocfs2]
ocfs2_convert_inline_data_to_extents+0x634/0xa60 [ocfs2]
ocfs2_write_begin_nolock+0x1c6/0x1da0 [ocfs2]
ocfs2_write_begin+0x13e/0x230 [ocfs2]
generic_perform_write+0xbf/0x1c0
__generic_file_write_iter+0x19c/0x1d0
ocfs2_file_write_iter+0x589/0x1360 [ocfs2]
__vfs_write+0xb8/0x110
vfs_write+0xa9/0x1b0
SyS_write+0x46/0xb0
system_call_fastpath+0x18/0xd7
Code: ff ff 8b 75 b8 39 75 b0 8b 45 c8 89 45 98 0f 84 e5 fe ff ff 45 8b 74 24 18 41 8b 54 24 1c e9 56 fc ff ff 85 c0 0f 85 48 ff ff ff <0f> 0b 48 8b 05 cf c3 de ff 48 ba 00 00 00 00 00 00 00 10 48 85
RIP __ocfs2_resv_find_window+0x498/0x760 [ocfs2]
RSP <ffff8800ea4db668>
---[ end trace 566f07529f2edf3c ]---
Kernel panic - not syncing: Fatal exception
Kernel Offset: disabled
Link: http://lkml.kernel.org/r/20181121020023.3034-2-junxiao.bi@oracle.com
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
Acked-by: Joseph Qi <jiangqi903@gmail.com>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Changwei Ge <ge.changwei@h3c.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-12-28 00:32:50 -08:00
| | la - > la_bm_off ) {
mlog ( ML_ERROR , " inconsistent detected, clean journal with "
" unrecovered local alloc, please run fsck.ocfs2! \n "
2005-12-15 14:31:24 -08:00
" found = %u, set = %u, taken = %u, off = %u \n " ,
num_used , le32_to_cpu ( alloc - > id1 . bitmap1 . i_used ) ,
le32_to_cpu ( alloc - > id1 . bitmap1 . i_total ) ,
OCFS2_LOCAL_ALLOC ( alloc ) - > la_bm_off ) ;
ocfs2: fix panic due to unrecovered local alloc
mount.ocfs2 ignore the inconsistent error that journal is clean but
local alloc is unrecovered. After mount, local alloc not empty, then
reserver cluster didn't alloc a new local alloc window, reserveration
map is empty(ocfs2_reservation_map.m_bitmap_len = 0), that triggered the
following panic.
This issue was reported at
https://oss.oracle.com/pipermail/ocfs2-devel/2015-May/010854.html
and was advised to fixed during mount. But this is a very unusual
inconsistent state, usually journal dirty flag should be cleared at the
last stage of umount until every other things go right. We may need do
further debug to check that. Any way to avoid possible futher
corruption, mount should be abort and fsck should be run.
(mount.ocfs2,1765,1):ocfs2_load_local_alloc:353 ERROR: Local alloc hasn't been recovered!
found = 6518, set = 6518, taken = 8192, off = 15912372
ocfs2: Mounting device (202,64) on (node 0, slot 3) with ordered data mode.
o2dlm: Joining domain 89CEAC63CC4F4D03AC185B44E0EE0F3F ( 0 1 2 3 4 5 6 8 ) 8 nodes
ocfs2: Mounting device (202,80) on (node 0, slot 3) with ordered data mode.
o2hb: Region 89CEAC63CC4F4D03AC185B44E0EE0F3F (xvdf) is now a quorum device
o2net: Accepted connection from node yvwsoa17p (num 7) at 172.22.77.88:7777
o2dlm: Node 7 joins domain 64FE421C8C984E6D96ED12C55FEE2435 ( 0 1 2 3 4 5 6 7 8 ) 9 nodes
o2dlm: Node 7 joins domain 89CEAC63CC4F4D03AC185B44E0EE0F3F ( 0 1 2 3 4 5 6 7 8 ) 9 nodes
------------[ cut here ]------------
kernel BUG at fs/ocfs2/reservations.c:507!
invalid opcode: 0000 [#1] SMP
Modules linked in: ocfs2 rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs fscache lockd grace ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sunrpc ipt_REJECT nf_reject_ipv4 nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 ovmapi ppdev parport_pc parport xen_netfront fb_sys_fops sysimgblt sysfillrect syscopyarea acpi_cpufreq pcspkr i2c_piix4 i2c_core sg ext4 jbd2 mbcache2 sr_mod cdrom xen_blkfront pata_acpi ata_generic ata_piix floppy dm_mirror dm_region_hash dm_log dm_mod
CPU: 0 PID: 4349 Comm: startWebLogic.s Not tainted 4.1.12-124.19.2.el6uek.x86_64 #2
Hardware name: Xen HVM domU, BIOS 4.4.4OVM 09/06/2018
task: ffff8803fb04e200 ti: ffff8800ea4d8000 task.ti: ffff8800ea4d8000
RIP: 0010:[<ffffffffa05e96a8>] [<ffffffffa05e96a8>] __ocfs2_resv_find_window+0x498/0x760 [ocfs2]
Call Trace:
ocfs2_resmap_resv_bits+0x10d/0x400 [ocfs2]
ocfs2_claim_local_alloc_bits+0xd0/0x640 [ocfs2]
__ocfs2_claim_clusters+0x178/0x360 [ocfs2]
ocfs2_claim_clusters+0x1f/0x30 [ocfs2]
ocfs2_convert_inline_data_to_extents+0x634/0xa60 [ocfs2]
ocfs2_write_begin_nolock+0x1c6/0x1da0 [ocfs2]
ocfs2_write_begin+0x13e/0x230 [ocfs2]
generic_perform_write+0xbf/0x1c0
__generic_file_write_iter+0x19c/0x1d0
ocfs2_file_write_iter+0x589/0x1360 [ocfs2]
__vfs_write+0xb8/0x110
vfs_write+0xa9/0x1b0
SyS_write+0x46/0xb0
system_call_fastpath+0x18/0xd7
Code: ff ff 8b 75 b8 39 75 b0 8b 45 c8 89 45 98 0f 84 e5 fe ff ff 45 8b 74 24 18 41 8b 54 24 1c e9 56 fc ff ff 85 c0 0f 85 48 ff ff ff <0f> 0b 48 8b 05 cf c3 de ff 48 ba 00 00 00 00 00 00 00 10 48 85
RIP __ocfs2_resv_find_window+0x498/0x760 [ocfs2]
RSP <ffff8800ea4db668>
---[ end trace 566f07529f2edf3c ]---
Kernel panic - not syncing: Fatal exception
Kernel Offset: disabled
Link: http://lkml.kernel.org/r/20181121020023.3034-2-junxiao.bi@oracle.com
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
Acked-by: Joseph Qi <jiangqi903@gmail.com>
Cc: Jun Piao <piaojun@huawei.com>
Cc: Mark Fasheh <mfasheh@versity.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Changwei Ge <ge.changwei@h3c.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-12-28 00:32:50 -08:00
status = - EINVAL ;
goto bail ;
}
2005-12-15 14:31:24 -08:00
osb - > local_alloc_bh = alloc_bh ;
osb - > local_alloc_state = OCFS2_LA_ENABLED ;
bail :
if ( status < 0 )
2008-10-07 14:25:16 -07:00
brelse ( alloc_bh ) ;
2016-01-14 15:17:27 -08:00
iput ( inode ) ;
2005-12-15 14:31:24 -08:00
2011-02-22 07:56:45 +08:00
trace_ocfs2_load_local_alloc ( osb - > local_alloc_bits ) ;
2007-12-20 14:58:11 -08:00
2011-03-07 16:43:21 +08:00
if ( status )
mlog_errno ( status ) ;
2005-12-15 14:31:24 -08:00
return status ;
}
/*
* return any unused bits to the bitmap and write out a clean
* local_alloc .
*
* local_alloc_bh is optional . If not passed , we will simply use the
* one off osb . If you do pass it however , be warned that it * will * be
* returned brelse ' d and NULL ' d out . */
void ocfs2_shutdown_local_alloc ( struct ocfs2_super * osb )
{
int status ;
2006-10-09 18:11:45 -07:00
handle_t * handle ;
2005-12-15 14:31:24 -08:00
struct inode * local_alloc_inode = NULL ;
struct buffer_head * bh = NULL ;
struct buffer_head * main_bm_bh = NULL ;
struct inode * main_bm_inode = NULL ;
struct ocfs2_dinode * alloc_copy = NULL ;
struct ocfs2_dinode * alloc = NULL ;
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
cancel_delayed_work ( & osb - > la_enable_wq ) ;
ocfs2: fix panic due to ocfs2_wq is null
mount.ocfs2 failed when reading ocfs2 filesystem superblock encounters
an error. ocfs2_initialize_super() returns before allocating ocfs2_wq.
ocfs2_dismount_volume() triggers the following panic.
Oct 15 16:09:27 cnwarekv-205120 kernel: On-disk corruption discovered.Please run fsck.ocfs2 once the filesystem is unmounted.
Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_read_locked_inode:537 ERROR: status = -30
Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_init_global_system_inodes:458 ERROR: status = -30
Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_init_global_system_inodes:491 ERROR: status = -30
Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_initialize_super:2313 ERROR: status = -30
Oct 15 16:09:27 cnwarekv-205120 kernel: (mount.ocfs2,22804,44): ocfs2_fill_super:1033 ERROR: status = -30
------------[ cut here ]------------
Oops: 0002 [#1] SMP NOPTI
CPU: 1 PID: 11753 Comm: mount.ocfs2 Tainted: G E
4.14.148-200.ckv.x86_64 #1
Hardware name: Sugon H320-G30/35N16-US, BIOS 0SSDX017 12/21/2018
task: ffff967af0520000 task.stack: ffffa5f05484000
RIP: 0010:mutex_lock+0x19/0x20
Call Trace:
flush_workqueue+0x81/0x460
ocfs2_shutdown_local_alloc+0x47/0x440 [ocfs2]
ocfs2_dismount_volume+0x84/0x400 [ocfs2]
ocfs2_fill_super+0xa4/0x1270 [ocfs2]
? ocfs2_initialize_super.isa.211+0xf20/0xf20 [ocfs2]
mount_bdev+0x17f/0x1c0
mount_fs+0x3a/0x160
Link: http://lkml.kernel.org/r/1571139611-24107-1-git-send-email-yili@winhong.com
Signed-off-by: Yi Li <yilikernel@gmail.com>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-10-18 20:20:08 -07:00
if ( osb - > ocfs2_wq )
flush_workqueue ( osb - > ocfs2_wq ) ;
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
2005-12-15 14:31:24 -08:00
if ( osb - > local_alloc_state = = OCFS2_LA_UNUSED )
2006-10-05 15:42:08 -07:00
goto out ;
2005-12-15 14:31:24 -08:00
local_alloc_inode =
ocfs2_get_system_file_inode ( osb ,
LOCAL_ALLOC_SYSTEM_INODE ,
osb - > slot_num ) ;
if ( ! local_alloc_inode ) {
status = - ENOENT ;
mlog_errno ( status ) ;
2006-10-05 15:42:08 -07:00
goto out ;
2005-12-15 14:31:24 -08:00
}
osb - > local_alloc_state = OCFS2_LA_DISABLED ;
2009-12-07 13:10:48 -08:00
ocfs2_resmap_uninit ( & osb - > osb_la_resmap ) ;
2005-12-15 14:31:24 -08:00
main_bm_inode = ocfs2_get_system_file_inode ( osb ,
GLOBAL_BITMAP_SYSTEM_INODE ,
OCFS2_INVALID_SLOT ) ;
if ( ! main_bm_inode ) {
status = - EINVAL ;
mlog_errno ( status ) ;
2006-10-05 15:42:08 -07:00
goto out ;
2005-12-15 14:31:24 -08:00
}
2016-01-22 15:40:57 -05:00
inode_lock ( main_bm_inode ) ;
2006-10-05 15:42:08 -07:00
2007-10-18 15:30:42 -07:00
status = ocfs2_inode_lock ( main_bm_inode , & main_bm_bh , 1 ) ;
2005-12-15 14:31:24 -08:00
if ( status < 0 ) {
mlog_errno ( status ) ;
2006-10-05 15:42:08 -07:00
goto out_mutex ;
2005-12-15 14:31:24 -08:00
}
/* WINDOW_MOVE_CREDITS is a bit heavy... */
2006-10-09 17:26:22 -07:00
handle = ocfs2_start_trans ( osb , OCFS2_WINDOW_MOVE_CREDITS ) ;
2005-12-15 14:31:24 -08:00
if ( IS_ERR ( handle ) ) {
mlog_errno ( PTR_ERR ( handle ) ) ;
handle = NULL ;
2006-10-05 15:42:08 -07:00
goto out_unlock ;
2005-12-15 14:31:24 -08:00
}
bh = osb - > local_alloc_bh ;
alloc = ( struct ocfs2_dinode * ) bh - > b_data ;
2019-07-11 20:53:19 -07:00
alloc_copy = kmemdup ( alloc , bh - > b_size , GFP_NOFS ) ;
2005-12-15 14:31:24 -08:00
if ( ! alloc_copy ) {
status = - ENOMEM ;
2006-10-05 15:42:08 -07:00
goto out_commit ;
2005-12-15 14:31:24 -08:00
}
2009-02-12 16:41:25 -08:00
status = ocfs2_journal_access_di ( handle , INODE_CACHE ( local_alloc_inode ) ,
bh , OCFS2_JOURNAL_ACCESS_WRITE ) ;
2005-12-15 14:31:24 -08:00
if ( status < 0 ) {
mlog_errno ( status ) ;
2006-10-05 15:42:08 -07:00
goto out_commit ;
2005-12-15 14:31:24 -08:00
}
ocfs2_clear_local_alloc ( alloc ) ;
2010-03-19 14:13:52 -07:00
ocfs2_journal_dirty ( handle , bh ) ;
2005-12-15 14:31:24 -08:00
brelse ( bh ) ;
osb - > local_alloc_bh = NULL ;
osb - > local_alloc_state = OCFS2_LA_UNUSED ;
status = ocfs2_sync_local_to_main ( osb , handle , alloc_copy ,
main_bm_inode , main_bm_bh ) ;
if ( status < 0 )
mlog_errno ( status ) ;
2006-10-05 15:42:08 -07:00
out_commit :
2006-10-09 16:48:10 -07:00
ocfs2_commit_trans ( osb , handle ) ;
2005-12-15 14:31:24 -08:00
2006-10-05 15:42:08 -07:00
out_unlock :
2008-10-07 14:25:16 -07:00
brelse ( main_bm_bh ) ;
2005-12-15 14:31:24 -08:00
2007-10-18 15:30:42 -07:00
ocfs2_inode_unlock ( main_bm_inode , 1 ) ;
2005-12-15 14:31:24 -08:00
2006-10-05 15:42:08 -07:00
out_mutex :
2016-01-22 15:40:57 -05:00
inode_unlock ( main_bm_inode ) ;
2006-10-05 15:42:08 -07:00
iput ( main_bm_inode ) ;
out :
2016-01-14 15:17:27 -08:00
iput ( local_alloc_inode ) ;
2005-12-15 14:31:24 -08:00
2013-02-21 16:42:44 -08:00
kfree ( alloc_copy ) ;
2005-12-15 14:31:24 -08:00
}
/*
* We want to free the bitmap bits outside of any recovery context as
* we ' ll need a cluster lock to do so , but we must clear the local
* alloc before giving up the recovered nodes journal . To solve this ,
* we kmalloc a copy of the local alloc before it ' s change for the
* caller to process with ocfs2_complete_local_alloc_recovery
*/
int ocfs2_begin_local_alloc_recovery ( struct ocfs2_super * osb ,
int slot_num ,
struct ocfs2_dinode * * alloc_copy )
{
int status = 0 ;
struct buffer_head * alloc_bh = NULL ;
struct inode * inode = NULL ;
struct ocfs2_dinode * alloc ;
2011-02-22 07:56:45 +08:00
trace_ocfs2_begin_local_alloc_recovery ( slot_num ) ;
2005-12-15 14:31:24 -08:00
* alloc_copy = NULL ;
inode = ocfs2_get_system_file_inode ( osb ,
LOCAL_ALLOC_SYSTEM_INODE ,
slot_num ) ;
if ( ! inode ) {
status = - EINVAL ;
mlog_errno ( status ) ;
goto bail ;
}
2016-01-22 15:40:57 -05:00
inode_lock ( inode ) ;
2005-12-15 14:31:24 -08:00
2008-11-13 14:49:11 -08:00
status = ocfs2_read_inode_block_full ( inode , & alloc_bh ,
OCFS2_BH_IGNORE_CACHE ) ;
2005-12-15 14:31:24 -08:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
* alloc_copy = kmalloc ( alloc_bh - > b_size , GFP_KERNEL ) ;
if ( ! ( * alloc_copy ) ) {
status = - ENOMEM ;
goto bail ;
}
memcpy ( ( * alloc_copy ) , alloc_bh - > b_data , alloc_bh - > b_size ) ;
alloc = ( struct ocfs2_dinode * ) alloc_bh - > b_data ;
ocfs2_clear_local_alloc ( alloc ) ;
2008-10-17 19:25:01 -07:00
ocfs2_compute_meta_ecc ( osb - > sb , alloc_bh - > b_data , & alloc - > i_check ) ;
2009-02-10 20:00:41 -08:00
status = ocfs2_write_block ( osb , alloc_bh , INODE_CACHE ( inode ) ) ;
2005-12-15 14:31:24 -08:00
if ( status < 0 )
mlog_errno ( status ) ;
bail :
2013-02-21 16:42:44 -08:00
if ( status < 0 ) {
2005-12-15 14:31:24 -08:00
kfree ( * alloc_copy ) ;
* alloc_copy = NULL ;
}
2008-10-07 14:25:16 -07:00
brelse ( alloc_bh ) ;
2005-12-15 14:31:24 -08:00
if ( inode ) {
2016-01-22 15:40:57 -05:00
inode_unlock ( inode ) ;
2005-12-15 14:31:24 -08:00
iput ( inode ) ;
}
2011-03-07 16:43:21 +08:00
if ( status )
mlog_errno ( status ) ;
2005-12-15 14:31:24 -08:00
return status ;
}
/*
* Step 2 : By now , we ' ve completed the journal recovery , we ' ve stamped
* a clean local alloc on disk and dropped the node out of the
* recovery map . Dlm locks will no longer stall , so lets clear out the
* main bitmap .
*/
int ocfs2_complete_local_alloc_recovery ( struct ocfs2_super * osb ,
struct ocfs2_dinode * alloc )
{
int status ;
2006-10-09 18:11:45 -07:00
handle_t * handle ;
2005-12-15 14:31:24 -08:00
struct buffer_head * main_bm_bh = NULL ;
2006-10-05 15:42:08 -07:00
struct inode * main_bm_inode ;
2005-12-15 14:31:24 -08:00
main_bm_inode = ocfs2_get_system_file_inode ( osb ,
GLOBAL_BITMAP_SYSTEM_INODE ,
OCFS2_INVALID_SLOT ) ;
if ( ! main_bm_inode ) {
status = - EINVAL ;
mlog_errno ( status ) ;
2006-10-05 15:42:08 -07:00
goto out ;
2005-12-15 14:31:24 -08:00
}
2016-01-22 15:40:57 -05:00
inode_lock ( main_bm_inode ) ;
2006-10-05 15:42:08 -07:00
2007-10-18 15:30:42 -07:00
status = ocfs2_inode_lock ( main_bm_inode , & main_bm_bh , 1 ) ;
2005-12-15 14:31:24 -08:00
if ( status < 0 ) {
mlog_errno ( status ) ;
2006-10-05 15:42:08 -07:00
goto out_mutex ;
2005-12-15 14:31:24 -08:00
}
2006-10-09 17:26:22 -07:00
handle = ocfs2_start_trans ( osb , OCFS2_WINDOW_MOVE_CREDITS ) ;
2005-12-15 14:31:24 -08:00
if ( IS_ERR ( handle ) ) {
status = PTR_ERR ( handle ) ;
handle = NULL ;
mlog_errno ( status ) ;
2006-10-05 15:42:08 -07:00
goto out_unlock ;
2005-12-15 14:31:24 -08:00
}
/* we want the bitmap change to be recorded on disk asap */
2006-10-09 18:11:45 -07:00
handle - > h_sync = 1 ;
2005-12-15 14:31:24 -08:00
status = ocfs2_sync_local_to_main ( osb , handle , alloc ,
main_bm_inode , main_bm_bh ) ;
if ( status < 0 )
mlog_errno ( status ) ;
2006-10-09 16:48:10 -07:00
ocfs2_commit_trans ( osb , handle ) ;
2006-10-05 15:42:08 -07:00
out_unlock :
2007-10-18 15:30:42 -07:00
ocfs2_inode_unlock ( main_bm_inode , 1 ) ;
2006-10-05 15:42:08 -07:00
out_mutex :
2016-01-22 15:40:57 -05:00
inode_unlock ( main_bm_inode ) ;
2005-12-15 14:31:24 -08:00
2008-10-07 14:25:16 -07:00
brelse ( main_bm_bh ) ;
2005-12-15 14:31:24 -08:00
2006-10-05 15:42:08 -07:00
iput ( main_bm_inode ) ;
2005-12-15 14:31:24 -08:00
2006-10-05 15:42:08 -07:00
out :
2008-03-05 16:11:46 +08:00
if ( ! status )
2010-01-25 14:11:06 +08:00
ocfs2_init_steal_slots ( osb ) ;
2011-03-07 16:43:21 +08:00
if ( status )
mlog_errno ( status ) ;
2005-12-15 14:31:24 -08:00
return status ;
}
/*
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
* make sure we ' ve got at least bits_wanted contiguous bits in the
2022-03-22 14:38:45 -07:00
* local alloc . You lose them when you drop i_rwsem .
2005-12-15 14:31:24 -08:00
*
* We will add ourselves to the transaction passed in , but may start
* our own in order to shift windows .
*/
int ocfs2_reserve_local_alloc_bits ( struct ocfs2_super * osb ,
u32 bits_wanted ,
struct ocfs2_alloc_context * ac )
{
int status ;
struct ocfs2_dinode * alloc ;
struct inode * local_alloc_inode ;
unsigned int free_bits ;
BUG_ON ( ! ac ) ;
local_alloc_inode =
ocfs2_get_system_file_inode ( osb ,
LOCAL_ALLOC_SYSTEM_INODE ,
osb - > slot_num ) ;
if ( ! local_alloc_inode ) {
status = - ENOENT ;
mlog_errno ( status ) ;
goto bail ;
}
2006-10-06 18:34:35 -07:00
2016-01-22 15:40:57 -05:00
inode_lock ( local_alloc_inode ) ;
2006-10-06 18:34:35 -07:00
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
/*
* We must double check state and allocator bits because
2022-03-22 14:38:45 -07:00
* another process may have changed them while holding i_rwsem .
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
*/
spin_lock ( & osb - > osb_lock ) ;
if ( ! ocfs2_la_state_enabled ( osb ) | |
( bits_wanted > osb - > local_alloc_bits ) ) {
spin_unlock ( & osb - > osb_lock ) ;
2005-12-15 14:31:24 -08:00
status = - ENOSPC ;
goto bail ;
}
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
spin_unlock ( & osb - > osb_lock ) ;
2005-12-15 14:31:24 -08:00
alloc = ( struct ocfs2_dinode * ) osb - > local_alloc_bh - > b_data ;
2008-06-12 22:35:39 -07:00
# ifdef CONFIG_OCFS2_DEBUG_FS
2005-12-15 14:31:24 -08:00
if ( le32_to_cpu ( alloc - > id1 . bitmap1 . i_used ) ! =
ocfs2_local_alloc_count_bits ( alloc ) ) {
2018-08-17 15:44:24 -07:00
status = ocfs2_error ( osb - > sb , " local alloc inode %llu says it has %u used bits, but a count shows %u \n " ,
( unsigned long long ) le64_to_cpu ( alloc - > i_blkno ) ,
le32_to_cpu ( alloc - > id1 . bitmap1 . i_used ) ,
ocfs2_local_alloc_count_bits ( alloc ) ) ;
2005-12-15 14:31:24 -08:00
goto bail ;
}
2007-11-13 19:59:33 +01:00
# endif
2005-12-15 14:31:24 -08:00
free_bits = le32_to_cpu ( alloc - > id1 . bitmap1 . i_total ) -
le32_to_cpu ( alloc - > id1 . bitmap1 . i_used ) ;
if ( bits_wanted > free_bits ) {
/* uhoh, window change time. */
status =
ocfs2_local_alloc_slide_window ( osb , local_alloc_inode ) ;
if ( status < 0 ) {
if ( status ! = - ENOSPC )
mlog_errno ( status ) ;
goto bail ;
}
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
/*
* Under certain conditions , the window slide code
* might have reduced the number of bits available or
2020-10-13 16:48:21 -07:00
* disabled the local alloc entirely . Re - check
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
* here and return - ENOSPC if necessary .
*/
status = - ENOSPC ;
if ( ! ocfs2_la_state_enabled ( osb ) )
goto bail ;
free_bits = le32_to_cpu ( alloc - > id1 . bitmap1 . i_total ) -
le32_to_cpu ( alloc - > id1 . bitmap1 . i_used ) ;
if ( bits_wanted > free_bits )
goto bail ;
2005-12-15 14:31:24 -08:00
}
2007-05-09 17:34:26 -07:00
ac - > ac_inode = local_alloc_inode ;
2008-03-03 17:12:30 +08:00
/* We should never use localalloc from another slot */
ac - > ac_alloc_slot = osb - > slot_num ;
2007-05-09 17:34:26 -07:00
ac - > ac_which = OCFS2_AC_USE_LOCAL ;
2005-12-15 14:31:24 -08:00
get_bh ( osb - > local_alloc_bh ) ;
ac - > ac_bh = osb - > local_alloc_bh ;
status = 0 ;
bail :
2007-09-21 11:41:43 -07:00
if ( status < 0 & & local_alloc_inode ) {
2016-01-22 15:40:57 -05:00
inode_unlock ( local_alloc_inode ) ;
2007-05-09 17:34:26 -07:00
iput ( local_alloc_inode ) ;
2007-09-21 11:41:43 -07:00
}
2005-12-15 14:31:24 -08:00
2011-02-22 07:56:45 +08:00
trace_ocfs2_reserve_local_alloc_bits (
( unsigned long long ) ac - > ac_max_block ,
bits_wanted , osb - > slot_num , status ) ;
2007-12-20 14:58:11 -08:00
2011-03-07 16:43:21 +08:00
if ( status )
mlog_errno ( status ) ;
2005-12-15 14:31:24 -08:00
return status ;
}
int ocfs2_claim_local_alloc_bits ( struct ocfs2_super * osb ,
2006-10-09 18:11:45 -07:00
handle_t * handle ,
2005-12-15 14:31:24 -08:00
struct ocfs2_alloc_context * ac ,
2007-09-16 20:10:16 -07:00
u32 bits_wanted ,
2005-12-15 14:31:24 -08:00
u32 * bit_off ,
u32 * num_bits )
{
int status , start ;
struct inode * local_alloc_inode ;
void * bitmap ;
struct ocfs2_dinode * alloc ;
struct ocfs2_local_alloc * la ;
BUG_ON ( ac - > ac_which ! = OCFS2_AC_USE_LOCAL ) ;
local_alloc_inode = ac - > ac_inode ;
alloc = ( struct ocfs2_dinode * ) osb - > local_alloc_bh - > b_data ;
la = OCFS2_LOCAL_ALLOC ( alloc ) ;
2009-12-07 13:10:48 -08:00
start = ocfs2_local_alloc_find_clear_bits ( osb , alloc , & bits_wanted ,
ac - > ac_resv ) ;
2005-12-15 14:31:24 -08:00
if ( start = = - 1 ) {
/* TODO: Shouldn't we just BUG here? */
status = - ENOSPC ;
mlog_errno ( status ) ;
goto bail ;
}
bitmap = la - > la_bitmap ;
* bit_off = le32_to_cpu ( la - > la_bm_off ) + start ;
* num_bits = bits_wanted ;
2009-02-12 16:41:25 -08:00
status = ocfs2_journal_access_di ( handle ,
INODE_CACHE ( local_alloc_inode ) ,
2008-10-17 19:25:01 -07:00
osb - > local_alloc_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
2005-12-15 14:31:24 -08:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
2009-12-07 13:10:48 -08:00
ocfs2_resmap_claimed_bits ( & osb - > osb_la_resmap , ac - > ac_resv , start ,
bits_wanted ) ;
2005-12-15 14:31:24 -08:00
while ( bits_wanted - - )
ocfs2_set_bit ( start + + , bitmap ) ;
2008-02-13 00:06:18 +01:00
le32_add_cpu ( & alloc - > id1 . bitmap1 . i_used , * num_bits ) ;
2010-03-19 14:13:52 -07:00
ocfs2_journal_dirty ( handle , osb - > local_alloc_bh ) ;
2005-12-15 14:31:24 -08:00
bail :
2011-03-07 16:43:21 +08:00
if ( status )
mlog_errno ( status ) ;
2005-12-15 14:31:24 -08:00
return status ;
}
2014-02-06 12:04:20 -08:00
int ocfs2_free_local_alloc_bits ( struct ocfs2_super * osb ,
handle_t * handle ,
struct ocfs2_alloc_context * ac ,
u32 bit_off ,
u32 num_bits )
{
int status , start ;
u32 clear_bits ;
struct inode * local_alloc_inode ;
void * bitmap ;
struct ocfs2_dinode * alloc ;
struct ocfs2_local_alloc * la ;
BUG_ON ( ac - > ac_which ! = OCFS2_AC_USE_LOCAL ) ;
local_alloc_inode = ac - > ac_inode ;
alloc = ( struct ocfs2_dinode * ) osb - > local_alloc_bh - > b_data ;
la = OCFS2_LOCAL_ALLOC ( alloc ) ;
bitmap = la - > la_bitmap ;
start = bit_off - le32_to_cpu ( la - > la_bm_off ) ;
clear_bits = num_bits ;
status = ocfs2_journal_access_di ( handle ,
INODE_CACHE ( local_alloc_inode ) ,
osb - > local_alloc_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
while ( clear_bits - - )
ocfs2_clear_bit ( start + + , bitmap ) ;
le32_add_cpu ( & alloc - > id1 . bitmap1 . i_used , - num_bits ) ;
ocfs2_journal_dirty ( handle , osb - > local_alloc_bh ) ;
bail :
return status ;
}
2005-12-15 14:31:24 -08:00
static u32 ocfs2_local_alloc_count_bits ( struct ocfs2_dinode * alloc )
{
2012-07-30 14:41:03 -07:00
u32 count ;
2005-12-15 14:31:24 -08:00
struct ocfs2_local_alloc * la = OCFS2_LOCAL_ALLOC ( alloc ) ;
2012-07-30 14:41:03 -07:00
count = memweight ( la - > la_bitmap , le16_to_cpu ( la - > la_size ) ) ;
2005-12-15 14:31:24 -08:00
2011-02-22 07:56:45 +08:00
trace_ocfs2_local_alloc_count_bits ( count ) ;
2005-12-15 14:31:24 -08:00
return count ;
}
static int ocfs2_local_alloc_find_clear_bits ( struct ocfs2_super * osb ,
2009-12-07 13:10:48 -08:00
struct ocfs2_dinode * alloc ,
u32 * numbits ,
struct ocfs2_alloc_reservation * resv )
2005-12-15 14:31:24 -08:00
{
2018-12-28 00:32:43 -08:00
int numfound = 0 , bitoff , left , startoff ;
2009-12-07 13:10:48 -08:00
int local_resv = 0 ;
struct ocfs2_alloc_reservation r ;
2005-12-15 14:31:24 -08:00
void * bitmap = NULL ;
2009-12-07 13:10:48 -08:00
struct ocfs2_reservation_map * resmap = & osb - > osb_la_resmap ;
2005-12-15 14:31:24 -08:00
if ( ! alloc - > id1 . bitmap1 . i_total ) {
bitoff = - 1 ;
goto bail ;
}
2009-12-07 13:10:48 -08:00
if ( ! resv ) {
local_resv = 1 ;
ocfs2_resv_init_once ( & r ) ;
ocfs2_resv_set_type ( & r , OCFS2_RESV_FLAG_TMP ) ;
resv = & r ;
}
numfound = * numbits ;
if ( ocfs2_resmap_resv_bits ( resmap , resv , & bitoff , & numfound ) = = 0 ) {
if ( numfound < * numbits )
* numbits = numfound ;
goto bail ;
}
/*
* Code error . While reservations are enabled , local
* allocation should _always_ go through them .
*/
BUG_ON ( osb - > osb_resv_level ! = 0 ) ;
/*
* Reservations are disabled . Handle this the old way .
*/
2005-12-15 14:31:24 -08:00
bitmap = OCFS2_LOCAL_ALLOC ( alloc ) - > la_bitmap ;
numfound = bitoff = startoff = 0 ;
left = le32_to_cpu ( alloc - > id1 . bitmap1 . i_total ) ;
while ( ( bitoff = ocfs2_find_next_zero_bit ( bitmap , left , startoff ) ) ! = - 1 ) {
if ( bitoff = = left ) {
/* mlog(0, "bitoff (%d) == left", bitoff); */
break ;
}
/* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
" numfound = %d \n " , bitoff , startoff , numfound ) ; */
/* Ok, we found a zero bit... is it contig. or do we
* start over ? */
if ( bitoff = = startoff ) {
/* we found a zero */
numfound + + ;
startoff + + ;
} else {
/* got a zero after some ones */
numfound = 1 ;
startoff = bitoff + 1 ;
}
/* we got everything we needed */
2009-12-07 13:10:48 -08:00
if ( numfound = = * numbits ) {
2005-12-15 14:31:24 -08:00
/* mlog(0, "Found it all!\n"); */
break ;
}
}
2011-02-22 07:56:45 +08:00
trace_ocfs2_local_alloc_find_clear_bits_search_bitmap ( bitoff , numfound ) ;
2005-12-15 14:31:24 -08:00
2010-04-06 16:46:46 +08:00
if ( numfound = = * numbits )
2005-12-15 14:31:24 -08:00
bitoff = startoff - numfound ;
2010-04-06 16:46:46 +08:00
else
2005-12-15 14:31:24 -08:00
bitoff = - 1 ;
bail :
2009-12-07 13:10:48 -08:00
if ( local_resv )
ocfs2_resv_discard ( resmap , resv ) ;
2011-02-22 07:56:45 +08:00
trace_ocfs2_local_alloc_find_clear_bits ( * numbits ,
le32_to_cpu ( alloc - > id1 . bitmap1 . i_total ) ,
bitoff , numfound ) ;
2005-12-15 14:31:24 -08:00
return bitoff ;
}
static void ocfs2_clear_local_alloc ( struct ocfs2_dinode * alloc )
{
struct ocfs2_local_alloc * la = OCFS2_LOCAL_ALLOC ( alloc ) ;
int i ;
alloc - > id1 . bitmap1 . i_total = 0 ;
alloc - > id1 . bitmap1 . i_used = 0 ;
la - > la_bm_off = 0 ;
for ( i = 0 ; i < le16_to_cpu ( la - > la_size ) ; i + + )
la - > la_bitmap [ i ] = 0 ;
}
#if 0
/* turn this on and uncomment below to aid debugging window shifts. */
static void ocfs2_verify_zero_bits ( unsigned long * bitmap ,
unsigned int start ,
unsigned int count )
{
unsigned int tmp = count ;
while ( tmp - - ) {
if ( ocfs2_test_bit ( start + tmp , bitmap ) ) {
printk ( " ocfs2_verify_zero_bits: start = %u, count = "
" %u \n " , start , count ) ;
printk ( " ocfs2_verify_zero_bits: bit %u is set! " ,
start + tmp ) ;
BUG ( ) ;
}
}
}
# endif
/*
* sync the local alloc to main bitmap .
*
* assumes you ' ve already locked the main bitmap - - the bitmap inode
* passed is used for caching .
*/
static int ocfs2_sync_local_to_main ( struct ocfs2_super * osb ,
2006-10-09 18:11:45 -07:00
handle_t * handle ,
2005-12-15 14:31:24 -08:00
struct ocfs2_dinode * alloc ,
struct inode * main_bm_inode ,
struct buffer_head * main_bm_bh )
{
int status = 0 ;
int bit_off , left , count , start ;
u64 la_start_blk ;
u64 blkno ;
void * bitmap ;
struct ocfs2_local_alloc * la = OCFS2_LOCAL_ALLOC ( alloc ) ;
2011-02-22 07:56:45 +08:00
trace_ocfs2_sync_local_to_main (
2011-02-21 11:10:44 +08:00
le32_to_cpu ( alloc - > id1 . bitmap1 . i_total ) ,
le32_to_cpu ( alloc - > id1 . bitmap1 . i_used ) ) ;
2005-12-15 14:31:24 -08:00
if ( ! alloc - > id1 . bitmap1 . i_total ) {
goto bail ;
}
if ( le32_to_cpu ( alloc - > id1 . bitmap1 . i_used ) = =
le32_to_cpu ( alloc - > id1 . bitmap1 . i_total ) ) {
goto bail ;
}
la_start_blk = ocfs2_clusters_to_blocks ( osb - > sb ,
le32_to_cpu ( la - > la_bm_off ) ) ;
bitmap = la - > la_bitmap ;
2023-06-22 11:27:36 +01:00
start = count = 0 ;
2005-12-15 14:31:24 -08:00
left = le32_to_cpu ( alloc - > id1 . bitmap1 . i_total ) ;
while ( ( bit_off = ocfs2_find_next_zero_bit ( bitmap , left , start ) )
! = - 1 ) {
if ( ( bit_off < left ) & & ( bit_off = = start ) ) {
count + + ;
start + + ;
continue ;
}
if ( count ) {
blkno = la_start_blk +
ocfs2_clusters_to_blocks ( osb - > sb ,
start - count ) ;
2011-02-22 07:56:45 +08:00
trace_ocfs2_sync_local_to_main_free (
2006-03-03 10:24:33 -08:00
count , start - count ,
( unsigned long long ) la_start_blk ,
( unsigned long long ) blkno ) ;
2005-12-15 14:31:24 -08:00
2010-03-11 18:31:09 -08:00
status = ocfs2_release_clusters ( handle ,
main_bm_inode ,
main_bm_bh , blkno ,
count ) ;
2005-12-15 14:31:24 -08:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
}
if ( bit_off > = left )
break ;
count = 1 ;
start = bit_off + 1 ;
}
bail :
2011-03-07 16:43:21 +08:00
if ( status )
mlog_errno ( status ) ;
2005-12-15 14:31:24 -08:00
return status ;
}
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
enum ocfs2_la_event {
OCFS2_LA_EVENT_SLIDE , /* Normal window slide. */
OCFS2_LA_EVENT_FRAGMENTED , /* The global bitmap has
* enough bits theoretically
* free , but a contiguous
* allocation could not be
* found . */
OCFS2_LA_EVENT_ENOSPC , /* Global bitmap doesn't have
* enough bits free to satisfy
* our request . */
} ;
# define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
/*
* Given an event , calculate the size of our next local alloc window .
*
2022-03-22 14:38:45 -07:00
* This should always be called under i_rwsem of the local alloc inode
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
* so that local alloc disabling doesn ' t race with processes trying to
* use the allocator .
*
* Returns the state which the local alloc was left in . This value can
* be ignored by some paths .
*/
static int ocfs2_recalc_la_window ( struct ocfs2_super * osb ,
enum ocfs2_la_event event )
{
unsigned int bits ;
int state ;
spin_lock ( & osb - > osb_lock ) ;
if ( osb - > local_alloc_state = = OCFS2_LA_DISABLED ) {
WARN_ON_ONCE ( osb - > local_alloc_state = = OCFS2_LA_DISABLED ) ;
goto out_unlock ;
}
/*
* ENOSPC and fragmentation are treated similarly for now .
*/
if ( event = = OCFS2_LA_EVENT_ENOSPC | |
event = = OCFS2_LA_EVENT_FRAGMENTED ) {
/*
* We ran out of contiguous space in the primary
* bitmap . Drastically reduce the number of bits used
* by local alloc until we have to disable it .
*/
bits = osb - > local_alloc_bits > > 1 ;
if ( bits > ocfs2_megabytes_to_clusters ( osb - > sb , 1 ) ) {
/*
* By setting state to THROTTLED , we ' ll keep
* the number of local alloc bits used down
* until an event occurs which would give us
* reason to assume the bitmap situation might
* have changed .
*/
osb - > local_alloc_state = OCFS2_LA_THROTTLED ;
osb - > local_alloc_bits = bits ;
} else {
osb - > local_alloc_state = OCFS2_LA_DISABLED ;
}
ocfs2: fix occurring deadlock by changing ocfs2_wq from global to local
This patch fixes a deadlock, as follows:
Node 1 Node 2 Node 3
1)volume a and b are only mount vol a only mount vol b
mounted
2) start to mount b start to mount a
3) check hb of Node 3 check hb of Node 2
in vol a, qs_holds++ in vol b, qs_holds++
4) -------------------- all nodes' network down --------------------
5) progress of mount b the same situation as
failed, and then call Node 2
ocfs2_dismount_volume.
but the process is hung,
since there is a work
in ocfs2_wq cannot beo
completed. This work is
about vol a, because
ocfs2_wq is global wq.
BTW, this work which is
scheduled in ocfs2_wq is
ocfs2_orphan_scan_work,
and the context in this work
needs to take inode lock
of orphan_dir, because
lockres owner are Node 1 and
all nodes' nework has been down
at the same time, so it can't
get the inode lock.
6) Why can't this node be fenced
when network disconnected?
Because the process of
mount is hung what caused qs_holds
is not equal 0.
Because all works in the ocfs2_wq are relative to the super block.
The solution is to change the ocfs2_wq from global to local. In other
words, move it into struct ocfs2_super.
Signed-off-by: Yiwen Jiang <jiangyiwen@huawei.com>
Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
Cc: Xue jiufei <xuejiufei@huawei.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Cc: Junxiao Bi <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-03-25 14:21:32 -07:00
queue_delayed_work ( osb - > ocfs2_wq , & osb - > la_enable_wq ,
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
OCFS2_LA_ENABLE_INTERVAL ) ;
goto out_unlock ;
}
/*
* Don ' t increase the size of the local alloc window until we
* know we might be able to fulfill the request . Otherwise , we
* risk bouncing around the global bitmap during periods of
* low space .
*/
if ( osb - > local_alloc_state ! = OCFS2_LA_THROTTLED )
osb - > local_alloc_bits = osb - > local_alloc_default_bits ;
out_unlock :
state = osb - > local_alloc_state ;
spin_unlock ( & osb - > osb_lock ) ;
return state ;
}
2005-12-15 14:31:24 -08:00
static int ocfs2_local_alloc_reserve_for_window ( struct ocfs2_super * osb ,
struct ocfs2_alloc_context * * ac ,
struct inode * * bitmap_inode ,
struct buffer_head * * bitmap_bh )
{
int status ;
2006-12-13 00:34:52 -08:00
* ac = kzalloc ( sizeof ( struct ocfs2_alloc_context ) , GFP_KERNEL ) ;
2005-12-15 14:31:24 -08:00
if ( ! ( * ac ) ) {
status = - ENOMEM ;
mlog_errno ( status ) ;
goto bail ;
}
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
retry_enospc :
2013-09-11 14:19:47 -07:00
( * ac ) - > ac_bits_wanted = osb - > local_alloc_bits ;
2005-12-15 14:31:24 -08:00
status = ocfs2_reserve_cluster_bitmap_bits ( osb , * ac ) ;
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
if ( status = = - ENOSPC ) {
if ( ocfs2_recalc_la_window ( osb , OCFS2_LA_EVENT_ENOSPC ) = =
OCFS2_LA_DISABLED )
goto bail ;
ocfs2_free_ac_resource ( * ac ) ;
memset ( * ac , 0 , sizeof ( struct ocfs2_alloc_context ) ) ;
goto retry_enospc ;
}
2005-12-15 14:31:24 -08:00
if ( status < 0 ) {
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
mlog_errno ( status ) ;
2005-12-15 14:31:24 -08:00
goto bail ;
}
* bitmap_inode = ( * ac ) - > ac_inode ;
igrab ( * bitmap_inode ) ;
* bitmap_bh = ( * ac ) - > ac_bh ;
get_bh ( * bitmap_bh ) ;
status = 0 ;
bail :
if ( ( status < 0 ) & & * ac ) {
ocfs2_free_alloc_context ( * ac ) ;
* ac = NULL ;
}
2011-03-07 16:43:21 +08:00
if ( status )
mlog_errno ( status ) ;
2005-12-15 14:31:24 -08:00
return status ;
}
/*
* pass it the bitmap lock in lock_bh if you have it .
*/
static int ocfs2_local_alloc_new_window ( struct ocfs2_super * osb ,
2006-10-09 18:11:45 -07:00
handle_t * handle ,
2005-12-15 14:31:24 -08:00
struct ocfs2_alloc_context * ac )
{
int status = 0 ;
u32 cluster_off , cluster_count ;
struct ocfs2_dinode * alloc = NULL ;
struct ocfs2_local_alloc * la ;
alloc = ( struct ocfs2_dinode * ) osb - > local_alloc_bh - > b_data ;
la = OCFS2_LOCAL_ALLOC ( alloc ) ;
2011-02-22 07:56:45 +08:00
trace_ocfs2_local_alloc_new_window (
le32_to_cpu ( alloc - > id1 . bitmap1 . i_total ) ,
osb - > local_alloc_bits ) ;
2006-06-05 16:41:00 -04:00
/* Instruct the allocation code to try the most recently used
* cluster group . We ' ll re - record the group used this pass
* below . */
ac - > ac_last_group = osb - > la_last_gd ;
2005-12-15 14:31:24 -08:00
/* we used the generic suballoc reserve function, but we set
* everything up nicely , so there ' s no reason why we can ' t use
* the more specific cluster api to claim bits . */
2010-05-06 13:59:06 +08:00
status = ocfs2_claim_clusters ( handle , ac , osb - > local_alloc_bits ,
2005-12-15 14:31:24 -08:00
& cluster_off , & cluster_count ) ;
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
if ( status = = - ENOSPC ) {
retry_enospc :
/*
* Note : We could also try syncing the journal here to
* allow use of any free bits which the current
* transaction can ' t give us access to . - - Mark
*/
if ( ocfs2_recalc_la_window ( osb , OCFS2_LA_EVENT_FRAGMENTED ) = =
OCFS2_LA_DISABLED )
goto bail ;
2013-09-11 14:19:47 -07:00
ac - > ac_bits_wanted = osb - > local_alloc_bits ;
2010-05-06 13:59:06 +08:00
status = ocfs2_claim_clusters ( handle , ac ,
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
osb - > local_alloc_bits ,
& cluster_off ,
& cluster_count ) ;
if ( status = = - ENOSPC )
goto retry_enospc ;
/*
* We only shrunk the * minimum * number of in our
* request - it ' s entirely possible that the allocator
* might give us more than we asked for .
*/
if ( status = = 0 ) {
spin_lock ( & osb - > osb_lock ) ;
osb - > local_alloc_bits = cluster_count ;
spin_unlock ( & osb - > osb_lock ) ;
}
}
2005-12-15 14:31:24 -08:00
if ( status < 0 ) {
if ( status ! = - ENOSPC )
mlog_errno ( status ) ;
goto bail ;
}
2006-06-05 16:41:00 -04:00
osb - > la_last_gd = ac - > ac_last_group ;
2005-12-15 14:31:24 -08:00
la - > la_bm_off = cpu_to_le32 ( cluster_off ) ;
alloc - > id1 . bitmap1 . i_total = cpu_to_le32 ( cluster_count ) ;
/* just in case... In the future when we find space ourselves,
* we don ' t have to get all contiguous - - but we ' ll have to
* set all previously used bits in bitmap and update
* la_bits_set before setting the bits in the main bitmap . */
alloc - > id1 . bitmap1 . i_used = 0 ;
memset ( OCFS2_LOCAL_ALLOC ( alloc ) - > la_bitmap , 0 ,
le16_to_cpu ( la - > la_size ) ) ;
2009-12-07 13:10:48 -08:00
ocfs2_resmap_restart ( & osb - > osb_la_resmap , cluster_count ,
OCFS2_LOCAL_ALLOC ( alloc ) - > la_bitmap ) ;
2011-02-22 07:56:45 +08:00
trace_ocfs2_local_alloc_new_window_result (
OCFS2_LOCAL_ALLOC ( alloc ) - > la_bm_off ,
le32_to_cpu ( alloc - > id1 . bitmap1 . i_total ) ) ;
2005-12-15 14:31:24 -08:00
bail :
2011-03-07 16:43:21 +08:00
if ( status )
mlog_errno ( status ) ;
2005-12-15 14:31:24 -08:00
return status ;
}
/* Note that we do *NOT* lock the local alloc inode here as
* it ' s been locked already for us . */
static int ocfs2_local_alloc_slide_window ( struct ocfs2_super * osb ,
struct inode * local_alloc_inode )
{
int status = 0 ;
struct buffer_head * main_bm_bh = NULL ;
struct inode * main_bm_inode = NULL ;
2006-10-09 18:11:45 -07:00
handle_t * handle = NULL ;
2005-12-15 14:31:24 -08:00
struct ocfs2_dinode * alloc ;
struct ocfs2_dinode * alloc_copy = NULL ;
struct ocfs2_alloc_context * ac = NULL ;
ocfs2: throttle back local alloc when low on disk space
Ocfs2's local allocator disables itself for the duration of a mount point
when it has trouble allocating a large enough area from the primary bitmap.
That can cause performance problems, especially for disks which were only
temporarily full or fragmented. This patch allows for the allocator to
shrink it's window first, before being disabled. Later, it can also be
re-enabled so that any performance drop is minimized.
To do this, we allow the value of osb->local_alloc_bits to be shrunk when
needed. The default value is recorded in a mostly read-only variable so that
we can re-initialize when required.
Locking had to be updated so that we could protect changes to
local_alloc_bits. Mostly this involves protecting various local alloc values
with the osb spinlock. A new state is also added, OCFS2_LA_THROTTLED, which
is used when the local allocator is has shrunk, but is not disabled. If the
available space dips below 1 megabyte, the local alloc file is disabled. In
either case, local alloc is re-enabled 30 seconds after the event, or when
an appropriate amount of bits is seen in the primary bitmap.
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
2008-07-28 18:02:53 -07:00
ocfs2_recalc_la_window ( osb , OCFS2_LA_EVENT_SLIDE ) ;
2005-12-15 14:31:24 -08:00
/* This will lock the main bitmap for us. */
status = ocfs2_local_alloc_reserve_for_window ( osb ,
& ac ,
& main_bm_inode ,
& main_bm_bh ) ;
if ( status < 0 ) {
if ( status ! = - ENOSPC )
mlog_errno ( status ) ;
goto bail ;
}
2006-10-09 17:26:22 -07:00
handle = ocfs2_start_trans ( osb , OCFS2_WINDOW_MOVE_CREDITS ) ;
2005-12-15 14:31:24 -08:00
if ( IS_ERR ( handle ) ) {
status = PTR_ERR ( handle ) ;
handle = NULL ;
mlog_errno ( status ) ;
goto bail ;
}
alloc = ( struct ocfs2_dinode * ) osb - > local_alloc_bh - > b_data ;
/* We want to clear the local alloc before doing anything
* else , so that if we error later during this operation ,
* local alloc shutdown won ' t try to double free main bitmap
* bits . Make a copy so the sync function knows which bits to
* free . */
2019-07-11 20:53:19 -07:00
alloc_copy = kmemdup ( alloc , osb - > local_alloc_bh - > b_size , GFP_NOFS ) ;
2005-12-15 14:31:24 -08:00
if ( ! alloc_copy ) {
status = - ENOMEM ;
mlog_errno ( status ) ;
goto bail ;
}
2009-02-12 16:41:25 -08:00
status = ocfs2_journal_access_di ( handle ,
INODE_CACHE ( local_alloc_inode ) ,
2008-10-17 19:25:01 -07:00
osb - > local_alloc_bh ,
OCFS2_JOURNAL_ACCESS_WRITE ) ;
2005-12-15 14:31:24 -08:00
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
ocfs2_clear_local_alloc ( alloc ) ;
2010-03-19 14:13:52 -07:00
ocfs2_journal_dirty ( handle , osb - > local_alloc_bh ) ;
2005-12-15 14:31:24 -08:00
status = ocfs2_sync_local_to_main ( osb , handle , alloc_copy ,
main_bm_inode , main_bm_bh ) ;
if ( status < 0 ) {
mlog_errno ( status ) ;
goto bail ;
}
status = ocfs2_local_alloc_new_window ( osb , handle , ac ) ;
if ( status < 0 ) {
if ( status ! = - ENOSPC )
mlog_errno ( status ) ;
goto bail ;
}
atomic_inc ( & osb - > alloc_stats . moves ) ;
bail :
if ( handle )
2006-10-09 16:48:10 -07:00
ocfs2_commit_trans ( osb , handle ) ;
2005-12-15 14:31:24 -08:00
2008-10-07 14:25:16 -07:00
brelse ( main_bm_bh ) ;
2005-12-15 14:31:24 -08:00
2016-01-14 15:17:27 -08:00
iput ( main_bm_inode ) ;
2013-02-21 16:42:44 -08:00
kfree ( alloc_copy ) ;
2005-12-15 14:31:24 -08:00
if ( ac )
ocfs2_free_alloc_context ( ac ) ;
2011-03-07 16:43:21 +08:00
if ( status )
mlog_errno ( status ) ;
2005-12-15 14:31:24 -08:00
return status ;
}