diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 02b6e7af8edb..873bb99fc2ff 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -2957,6 +2957,210 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb) return status; } +/* + * Delayed de-allocation of suballocator blocks. + * + * Some sets of block de-allocations might involve multiple suballocator inodes. + * + * The locking for this can get extremely complicated, especially when + * the suballocator inodes to delete from aren't known until deep + * within an unrelated codepath. + * + * ocfs2_extent_block structures are a good example of this - an inode + * btree could have been grown by any number of nodes each allocating + * out of their own suballoc inode. + * + * These structures allow the delay of block de-allocation until a + * later time, when locking of multiple cluster inodes won't cause + * deadlock. + */ + +/* + * Describes a single block free from a suballocator + */ +struct ocfs2_cached_block_free { + struct ocfs2_cached_block_free *free_next; + u64 free_blk; + unsigned int free_bit; +}; + +struct ocfs2_per_slot_free_list { + struct ocfs2_per_slot_free_list *f_next_suballocator; + int f_inode_type; + int f_slot; + struct ocfs2_cached_block_free *f_first; +}; + +static int ocfs2_free_cached_items(struct ocfs2_super *osb, + int sysfile_type, + int slot, + struct ocfs2_cached_block_free *head) +{ + int ret; + u64 bg_blkno; + handle_t *handle; + struct inode *inode; + struct buffer_head *di_bh = NULL; + struct ocfs2_cached_block_free *tmp; + + inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot); + if (!inode) { + ret = -EINVAL; + mlog_errno(ret); + goto out; + } + + mutex_lock(&inode->i_mutex); + + ret = ocfs2_meta_lock(inode, &di_bh, 1); + if (ret) { + mlog_errno(ret); + goto out_mutex; + } + + handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + mlog_errno(ret); + goto out_unlock; + } + + while (head) { + bg_blkno = ocfs2_which_suballoc_group(head->free_blk, + head->free_bit); + mlog(0, "Free bit: (bit %u, blkno %llu)\n", + head->free_bit, (unsigned long long)head->free_blk); + + ret = ocfs2_free_suballoc_bits(handle, inode, di_bh, + head->free_bit, bg_blkno, 1); + if (ret) { + mlog_errno(ret); + goto out_journal; + } + + ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE); + if (ret) { + mlog_errno(ret); + goto out_journal; + } + + tmp = head; + head = head->free_next; + kfree(tmp); + } + +out_journal: + ocfs2_commit_trans(osb, handle); + +out_unlock: + ocfs2_meta_unlock(inode, 1); + brelse(di_bh); +out_mutex: + mutex_unlock(&inode->i_mutex); + iput(inode); +out: + while(head) { + /* Premature exit may have left some dangling items. */ + tmp = head; + head = head->free_next; + kfree(tmp); + } + + return ret; +} + +int ocfs2_run_deallocs(struct ocfs2_super *osb, + struct ocfs2_cached_dealloc_ctxt *ctxt) +{ + int ret = 0, ret2; + struct ocfs2_per_slot_free_list *fl; + + if (!ctxt) + return 0; + + while (ctxt->c_first_suballocator) { + fl = ctxt->c_first_suballocator; + + if (fl->f_first) { + mlog(0, "Free items: (type %u, slot %d)\n", + fl->f_inode_type, fl->f_slot); + ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type, + fl->f_slot, fl->f_first); + if (ret2) + mlog_errno(ret2); + if (!ret) + ret = ret2; + } + + ctxt->c_first_suballocator = fl->f_next_suballocator; + kfree(fl); + } + + return ret; +} + +static struct ocfs2_per_slot_free_list * +ocfs2_find_per_slot_free_list(int type, + int slot, + struct ocfs2_cached_dealloc_ctxt *ctxt) +{ + struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator; + + while (fl) { + if (fl->f_inode_type == type && fl->f_slot == slot) + return fl; + + fl = fl->f_next_suballocator; + } + + fl = kmalloc(sizeof(*fl), GFP_NOFS); + if (fl) { + fl->f_inode_type = type; + fl->f_slot = slot; + fl->f_first = NULL; + fl->f_next_suballocator = ctxt->c_first_suballocator; + + ctxt->c_first_suballocator = fl; + } + return fl; +} + +static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, + int type, int slot, u64 blkno, + unsigned int bit) +{ + int ret; + struct ocfs2_per_slot_free_list *fl; + struct ocfs2_cached_block_free *item; + + fl = ocfs2_find_per_slot_free_list(type, slot, ctxt); + if (fl == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + item = kmalloc(sizeof(*item), GFP_NOFS); + if (item == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", + type, slot, bit, (unsigned long long)blkno); + + item->free_blk = blkno; + item->free_bit = bit; + item->free_next = fl->f_first; + + fl->f_first = item; + + ret = 0; +out: + return ret; +} + /* This function will figure out whether the currently last extent * block will be deleted, and if it will, what the new last extent * block will be so we can update his h_next_leaf_blk field, as well diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index fbcb5934a081..01db0adc2150 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -63,6 +63,25 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, struct ocfs2_dinode *tl_copy); +/* + * Process local structure which describes the block unlinks done + * during an operation. This is populated via + * ocfs2_cache_block_dealloc(). + * + * ocfs2_run_deallocs() should be called after the potentially + * de-allocating routines. No journal handles should be open, and most + * locks should have been dropped. + */ +struct ocfs2_cached_dealloc_ctxt { + struct ocfs2_per_slot_free_list *c_first_suballocator; +}; +static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c) +{ + c->c_first_suballocator = NULL; +} +int ocfs2_run_deallocs(struct ocfs2_super *osb, + struct ocfs2_cached_dealloc_ctxt *ctxt); + struct ocfs2_truncate_context { struct inode *tc_ext_alloc_inode; struct buffer_head *tc_ext_alloc_bh; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index e3437626d183..6788f2f1a667 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -98,14 +98,6 @@ static int ocfs2_relink_block_group(handle_t *handle, u16 chain); static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, u32 wanted); -static int ocfs2_free_suballoc_bits(handle_t *handle, - struct inode *alloc_inode, - struct buffer_head *alloc_bh, - unsigned int start_bit, - u64 bg_blkno, - unsigned int count); -static inline u64 ocfs2_which_suballoc_group(u64 block, - unsigned int bit); static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, u64 bg_blkno, u16 bg_bit_off); @@ -1626,12 +1618,12 @@ bail: /* * expects the suballoc inode to already be locked. */ -static int ocfs2_free_suballoc_bits(handle_t *handle, - struct inode *alloc_inode, - struct buffer_head *alloc_bh, - unsigned int start_bit, - u64 bg_blkno, - unsigned int count) +int ocfs2_free_suballoc_bits(handle_t *handle, + struct inode *alloc_inode, + struct buffer_head *alloc_bh, + unsigned int start_bit, + u64 bg_blkno, + unsigned int count) { int status = 0; u32 tmp_used; @@ -1703,13 +1695,6 @@ bail: return status; } -static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) -{ - u64 group = block - (u64) bit; - - return group; -} - int ocfs2_free_dinode(handle_t *handle, struct inode *inode_alloc_inode, struct buffer_head *inode_alloc_bh, diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 1a3c94cb9250..7bc4819db4db 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -86,6 +86,12 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, u32 *cluster_start, u32 *num_clusters); +int ocfs2_free_suballoc_bits(handle_t *handle, + struct inode *alloc_inode, + struct buffer_head *alloc_bh, + unsigned int start_bit, + u64 bg_blkno, + unsigned int count); int ocfs2_free_dinode(handle_t *handle, struct inode *inode_alloc_inode, struct buffer_head *inode_alloc_bh, @@ -100,6 +106,13 @@ int ocfs2_free_clusters(handle_t *handle, u64 start_blk, unsigned int num_clusters); +static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) +{ + u64 group = block - (u64) bit; + + return group; +} + static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb, u64 bg_blkno) {