e897be17a4
Patch series "nilfs2 lockdep warning fixes". The first two are to resolve the lockdep warning issue, and the last one is the accompanying cleanup and low priority. Based on your comment, this series solves the issue by separating inode object as needed. Since I was worried about the impact of the object composition changes, I tested the series carefully not to cause regressions especially for delicate functions such like disk space reclamation and snapshots. This patch (of 3): If CONFIG_LOCKDEP is enabled, nilfs2 hits lockdep warnings at inode_to_wb() during page/folio operations for btree nodes: WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 inode_to_wb include/linux/backing-dev.h:269 [inline] WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 folio_account_dirtied mm/page-writeback.c:2460 [inline] WARNING: CPU: 0 PID: 6575 at include/linux/backing-dev.h:269 __folio_mark_dirty+0xa7c/0xe30 mm/page-writeback.c:2509 Modules linked in: ... RIP: 0010:inode_to_wb include/linux/backing-dev.h:269 [inline] RIP: 0010:folio_account_dirtied mm/page-writeback.c:2460 [inline] RIP: 0010:__folio_mark_dirty+0xa7c/0xe30 mm/page-writeback.c:2509 ... Call Trace: __set_page_dirty include/linux/pagemap.h:834 [inline] mark_buffer_dirty+0x4e6/0x650 fs/buffer.c:1145 nilfs_btree_propagate_p fs/nilfs2/btree.c:1889 [inline] nilfs_btree_propagate+0x4ae/0xea0 fs/nilfs2/btree.c:2085 nilfs_bmap_propagate+0x73/0x170 fs/nilfs2/bmap.c:337 nilfs_collect_dat_data+0x45/0xd0 fs/nilfs2/segment.c:625 nilfs_segctor_apply_buffers+0x14a/0x470 fs/nilfs2/segment.c:1009 nilfs_segctor_scan_file+0x47a/0x700 fs/nilfs2/segment.c:1048 nilfs_segctor_collect_blocks fs/nilfs2/segment.c:1224 [inline] nilfs_segctor_collect fs/nilfs2/segment.c:1494 [inline] nilfs_segctor_do_construct+0x14f3/0x6c60 fs/nilfs2/segment.c:2036 nilfs_segctor_construct+0x7a7/0xb30 fs/nilfs2/segment.c:2372 nilfs_segctor_thread_construct fs/nilfs2/segment.c:2480 [inline] nilfs_segctor_thread+0x3c3/0xf90 fs/nilfs2/segment.c:2563 kthread+0x405/0x4f0 kernel/kthread.c:327 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295 This is because nilfs2 uses two page caches for each inode and inode->i_mapping never points to one of them, the btree node cache. This causes inode_to_wb(inode) to refer to a different page cache than the caller page/folio operations such like __folio_start_writeback(), __folio_end_writeback(), or __folio_mark_dirty() acquired the lock. This patch resolves the issue by allocating and using an additional inode to hold the page cache of btree nodes. The inode is attached one-to-one to the traditional nilfs2 inode if it requires a block mapping with b-tree. This setup change is in memory only and does not affect the disk format. Link: https://lkml.kernel.org/r/1647867427-30498-1-git-send-email-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/1647867427-30498-2-git-send-email-konishi.ryusuke@gmail.com Link: https://lore.kernel.org/r/YXrYvIo8YRnAOJCj@casper.infradead.org Link: https://lore.kernel.org/r/9a20b33d-b38f-b4a2-4742-c1eb5b8e4d6c@redhat.com Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com> Reported-by: syzbot+0d5b462a6f07447991b3@syzkaller.appspotmail.com Reported-by: syzbot+34ef28bb2aeb28724aa0@syzkaller.appspotmail.com Reported-by: Hao Sun <sunhao.th@gmail.com> Reported-by: David Hildenbrand <david@redhat.com> Tested-by: Ryusuke Konishi <konishi.ryusuke@gmail.com> Cc: Matthew Wilcox <willy@infradead.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
193 lines
5.2 KiB
C
193 lines
5.2 KiB
C
// SPDX-License-Identifier: GPL-2.0+
|
|
/*
|
|
* Dummy inodes to buffer blocks for garbage collection
|
|
*
|
|
* Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
|
|
*
|
|
* Written by Seiji Kihara, Amagai Yoshiji, and Ryusuke Konishi.
|
|
* Revised by Ryusuke Konishi.
|
|
*
|
|
*/
|
|
/*
|
|
* This file adds the cache of on-disk blocks to be moved in garbage
|
|
* collection. The disk blocks are held with dummy inodes (called
|
|
* gcinodes), and this file provides lookup function of the dummy
|
|
* inodes and their buffer read function.
|
|
*
|
|
* Buffers and pages held by the dummy inodes will be released each
|
|
* time after they are copied to a new log. Dirty blocks made on the
|
|
* current generation and the blocks to be moved by GC never overlap
|
|
* because the dirty blocks make a new generation; they rather must be
|
|
* written individually.
|
|
*/
|
|
|
|
#include <linux/buffer_head.h>
|
|
#include <linux/mpage.h>
|
|
#include <linux/hash.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/swap.h>
|
|
#include "nilfs.h"
|
|
#include "btree.h"
|
|
#include "btnode.h"
|
|
#include "page.h"
|
|
#include "mdt.h"
|
|
#include "dat.h"
|
|
#include "ifile.h"
|
|
|
|
/*
|
|
* nilfs_gccache_submit_read_data() - add data buffer and submit read request
|
|
* @inode - gc inode
|
|
* @blkoff - dummy offset treated as the key for the page cache
|
|
* @pbn - physical block number of the block
|
|
* @vbn - virtual block number of the block, 0 for non-virtual block
|
|
* @out_bh - indirect pointer to a buffer_head struct to receive the results
|
|
*
|
|
* Description: nilfs_gccache_submit_read_data() registers the data buffer
|
|
* specified by @pbn to the GC pagecache with the key @blkoff.
|
|
* This function sets @vbn (@pbn if @vbn is zero) in b_blocknr of the buffer.
|
|
*
|
|
* Return Value: On success, 0 is returned. On Error, one of the following
|
|
* negative error code is returned.
|
|
*
|
|
* %-EIO - I/O error.
|
|
*
|
|
* %-ENOMEM - Insufficient amount of memory available.
|
|
*
|
|
* %-ENOENT - The block specified with @pbn does not exist.
|
|
*/
|
|
int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
|
|
sector_t pbn, __u64 vbn,
|
|
struct buffer_head **out_bh)
|
|
{
|
|
struct buffer_head *bh;
|
|
int err;
|
|
|
|
bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0);
|
|
if (unlikely(!bh))
|
|
return -ENOMEM;
|
|
|
|
if (buffer_uptodate(bh))
|
|
goto out;
|
|
|
|
if (pbn == 0) {
|
|
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
|
|
|
|
err = nilfs_dat_translate(nilfs->ns_dat, vbn, &pbn);
|
|
if (unlikely(err)) { /* -EIO, -ENOMEM, -ENOENT */
|
|
brelse(bh);
|
|
goto failed;
|
|
}
|
|
}
|
|
|
|
lock_buffer(bh);
|
|
if (buffer_uptodate(bh)) {
|
|
unlock_buffer(bh);
|
|
goto out;
|
|
}
|
|
|
|
if (!buffer_mapped(bh)) {
|
|
bh->b_bdev = inode->i_sb->s_bdev;
|
|
set_buffer_mapped(bh);
|
|
}
|
|
bh->b_blocknr = pbn;
|
|
bh->b_end_io = end_buffer_read_sync;
|
|
get_bh(bh);
|
|
submit_bh(REQ_OP_READ, 0, bh);
|
|
if (vbn)
|
|
bh->b_blocknr = vbn;
|
|
out:
|
|
err = 0;
|
|
*out_bh = bh;
|
|
|
|
failed:
|
|
unlock_page(bh->b_page);
|
|
put_page(bh->b_page);
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* nilfs_gccache_submit_read_node() - add node buffer and submit read request
|
|
* @inode - gc inode
|
|
* @pbn - physical block number for the block
|
|
* @vbn - virtual block number for the block
|
|
* @out_bh - indirect pointer to a buffer_head struct to receive the results
|
|
*
|
|
* Description: nilfs_gccache_submit_read_node() registers the node buffer
|
|
* specified by @vbn to the GC pagecache. @pbn can be supplied by the
|
|
* caller to avoid translation of the disk block address.
|
|
*
|
|
* Return Value: On success, 0 is returned. On Error, one of the following
|
|
* negative error code is returned.
|
|
*
|
|
* %-EIO - I/O error.
|
|
*
|
|
* %-ENOMEM - Insufficient amount of memory available.
|
|
*/
|
|
int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn,
|
|
__u64 vbn, struct buffer_head **out_bh)
|
|
{
|
|
struct inode *btnc_inode = NILFS_I(inode)->i_assoc_inode;
|
|
int ret;
|
|
|
|
ret = nilfs_btnode_submit_block(btnc_inode->i_mapping,
|
|
vbn ? : pbn, pbn, REQ_OP_READ, 0,
|
|
out_bh, &pbn);
|
|
if (ret == -EEXIST) /* internal code (cache hit) */
|
|
ret = 0;
|
|
return ret;
|
|
}
|
|
|
|
int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
|
|
{
|
|
wait_on_buffer(bh);
|
|
if (!buffer_uptodate(bh)) {
|
|
struct inode *inode = bh->b_page->mapping->host;
|
|
|
|
nilfs_err(inode->i_sb,
|
|
"I/O error reading %s block for GC (ino=%lu, vblocknr=%llu)",
|
|
buffer_nilfs_node(bh) ? "node" : "data",
|
|
inode->i_ino, (unsigned long long)bh->b_blocknr);
|
|
return -EIO;
|
|
}
|
|
if (buffer_dirty(bh))
|
|
return -EEXIST;
|
|
|
|
if (buffer_nilfs_node(bh) && nilfs_btree_broken_node_block(bh)) {
|
|
clear_buffer_uptodate(bh);
|
|
return -EIO;
|
|
}
|
|
mark_buffer_dirty(bh);
|
|
return 0;
|
|
}
|
|
|
|
int nilfs_init_gcinode(struct inode *inode)
|
|
{
|
|
struct nilfs_inode_info *ii = NILFS_I(inode);
|
|
|
|
inode->i_mode = S_IFREG;
|
|
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
|
|
inode->i_mapping->a_ops = &empty_aops;
|
|
|
|
ii->i_flags = 0;
|
|
nilfs_bmap_init_gc(ii->i_bmap);
|
|
|
|
return nilfs_attach_btree_node_cache(inode);
|
|
}
|
|
|
|
/**
|
|
* nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes
|
|
*/
|
|
void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs)
|
|
{
|
|
struct list_head *head = &nilfs->ns_gc_inodes;
|
|
struct nilfs_inode_info *ii;
|
|
|
|
while (!list_empty(head)) {
|
|
ii = list_first_entry(head, struct nilfs_inode_info, i_dirty);
|
|
list_del_init(&ii->i_dirty);
|
|
truncate_inode_pages(&ii->vfs_inode.i_data, 0);
|
|
nilfs_btnode_cache_clear(ii->i_assoc_inode->i_mapping);
|
|
iput(&ii->vfs_inode);
|
|
}
|
|
}
|