Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (40 commits)
  ext4: Adding error check after calling ext4_mb_regular_allocator()
  ext4: Fix dirtying of journalled buffers in data=journal mode
  ext4: re-inline ext4_rec_len_(to|from)_disk functions
  jbd2: Remove t_handle_lock from start_this_handle()
  jbd2: Change j_state_lock to be a rwlock_t
  jbd2: Use atomic variables to avoid taking t_handle_lock in jbd2_journal_stop
  ext4: Add mount options in superblock
  ext4: force block allocation on quota_off
  ext4: fix freeze deadlock under IO
  ext4: drop inode from orphan list if ext4_delete_inode() fails
  ext4: check to make make sure bd_dev is set before dereferencing it
  jbd2: Make barrier messages less scary
  ext4: don't print scary messages for allocation failures post-abort
  ext4: fix EFBIG edge case when writing to large non-extent file
  ext4: fix ext4_get_blocks references
  ext4: Always journal quota file modifications
  ext4: Fix potential memory leak in ext4_fill_super
  ext4: Don't error out the fs if the user tries to make a file too big
  ext4: allocate stripe-multiple IOs on stripe boundaries
  ext4: move aio completion after unwritten extent conversion
  ...

Fix up conflicts in fs/ext4/inode.c as per Ted.

Fix up xfs conflicts as per earlier xfs merge.
This commit is contained in:
Linus Torvalds 2010-08-07 13:03:53 -07:00
commit 09dc942c2a
26 changed files with 917 additions and 654 deletions

View File

@ -204,6 +204,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
return error; return error;
else { else {
inode->i_mode = mode; inode->i_mode = mode;
inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
if (error == 0) if (error == 0)
acl = NULL; acl = NULL;

View File

@ -377,14 +377,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ext4_grpblk_t bit; ext4_grpblk_t bit;
unsigned int i; unsigned int i;
struct ext4_group_desc *desc; struct ext4_group_desc *desc;
struct ext4_super_block *es; struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_sb_info *sbi;
int err = 0, ret, blk_free_count; int err = 0, ret, blk_free_count;
ext4_grpblk_t blocks_freed; ext4_grpblk_t blocks_freed;
struct ext4_group_info *grp; struct ext4_group_info *grp;
sbi = EXT4_SB(sb);
es = sbi->s_es;
ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
ext4_get_group_no_and_offset(sb, block, &block_group, &bit); ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@ -477,7 +474,6 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh); ret = ext4_handle_dirty_metadata(handle, NULL, gd_bh);
if (!err) if (!err)
err = ret; err = ret;
sb->s_dirt = 1;
error_return: error_return:
brelse(bitmap_bh); brelse(bitmap_bh);

View File

@ -229,16 +229,20 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
(start_blk + count < start_blk) || (start_blk + count < start_blk) ||
(start_blk + count > ext4_blocks_count(sbi->s_es))) (start_blk + count > ext4_blocks_count(sbi->s_es))) {
sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
return 0; return 0;
}
while (n) { while (n) {
entry = rb_entry(n, struct ext4_system_zone, node); entry = rb_entry(n, struct ext4_system_zone, node);
if (start_blk + count - 1 < entry->start_blk) if (start_blk + count - 1 < entry->start_blk)
n = n->rb_left; n = n->rb_left;
else if (start_blk >= (entry->start_blk + entry->count)) else if (start_blk >= (entry->start_blk + entry->count))
n = n->rb_right; n = n->rb_right;
else else {
sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
return 0; return 0;
}
} }
return 1; return 1;
} }

View File

@ -61,10 +61,11 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
} }
int ext4_check_dir_entry(const char *function, struct inode *dir, int __ext4_check_dir_entry(const char *function, unsigned int line,
struct ext4_dir_entry_2 *de, struct inode *dir,
struct buffer_head *bh, struct ext4_dir_entry_2 *de,
unsigned int offset) struct buffer_head *bh,
unsigned int offset)
{ {
const char *error_msg = NULL; const char *error_msg = NULL;
const int rlen = ext4_rec_len_from_disk(de->rec_len, const int rlen = ext4_rec_len_from_disk(de->rec_len,
@ -83,11 +84,10 @@ int ext4_check_dir_entry(const char *function, struct inode *dir,
error_msg = "inode out of bounds"; error_msg = "inode out of bounds";
if (error_msg != NULL) if (error_msg != NULL)
ext4_error_inode(function, dir, ext4_error_inode(dir, function, line, bh->b_blocknr,
"bad entry in directory: %s - block=%llu" "bad entry in directory: %s - "
"offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", "offset=%u(%u), inode=%u, rec_len=%d, name_len=%d",
error_msg, (unsigned long long) bh->b_blocknr, error_msg, (unsigned) (offset%bh->b_size), offset,
(unsigned) (offset%bh->b_size), offset,
le32_to_cpu(de->inode), le32_to_cpu(de->inode),
rlen, de->name_len); rlen, de->name_len);
return error_msg == NULL ? 1 : 0; return error_msg == NULL ? 1 : 0;
@ -121,7 +121,8 @@ static int ext4_readdir(struct file *filp,
* We don't set the inode dirty flag since it's not * We don't set the inode dirty flag since it's not
* critical that it get flushed back to the disk. * critical that it get flushed back to the disk.
*/ */
ext4_clear_inode_flag(filp->f_path.dentry->d_inode, EXT4_INODE_INDEX); ext4_clear_inode_flag(filp->f_path.dentry->d_inode,
EXT4_INODE_INDEX);
} }
stored = 0; stored = 0;
offset = filp->f_pos & (sb->s_blocksize - 1); offset = filp->f_pos & (sb->s_blocksize - 1);
@ -193,7 +194,7 @@ revalidate:
while (!error && filp->f_pos < inode->i_size while (!error && filp->f_pos < inode->i_size
&& offset < sb->s_blocksize) { && offset < sb->s_blocksize) {
de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
if (!ext4_check_dir_entry("ext4_readdir", inode, de, if (!ext4_check_dir_entry(inode, de,
bh, offset)) { bh, offset)) {
/* /*
* On error, skip the f_pos to the next block * On error, skip the f_pos to the next block
@ -343,7 +344,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
struct dir_private_info *info; struct dir_private_info *info;
int len; int len;
info = (struct dir_private_info *) dir_file->private_data; info = dir_file->private_data;
p = &info->root.rb_node; p = &info->root.rb_node;
/* Create and allocate the fname structure */ /* Create and allocate the fname structure */

View File

@ -57,10 +57,13 @@
#endif #endif
#define EXT4_ERROR_INODE(inode, fmt, a...) \ #define EXT4_ERROR_INODE(inode, fmt, a...) \
ext4_error_inode(__func__, (inode), (fmt), ## a) ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
#define EXT4_ERROR_INODE_BLOCK(inode, block, fmt, a...) \
ext4_error_inode((inode), __func__, __LINE__, (block), (fmt), ## a)
#define EXT4_ERROR_FILE(file, fmt, a...) \ #define EXT4_ERROR_FILE(file, fmt, a...) \
ext4_error_file(__func__, (file), (fmt), ## a) ext4_error_file(__func__, __LINE__, (file), (fmt), ## a)
/* data type for block offset of block group */ /* data type for block offset of block group */
typedef int ext4_grpblk_t; typedef int ext4_grpblk_t;
@ -167,13 +170,15 @@ struct mpage_da_data {
}; };
#define EXT4_IO_UNWRITTEN 0x1 #define EXT4_IO_UNWRITTEN 0x1
typedef struct ext4_io_end { typedef struct ext4_io_end {
struct list_head list; /* per-file finished AIO list */ struct list_head list; /* per-file finished IO list */
struct inode *inode; /* file being written to */ struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */ unsigned int flag; /* unwritten or not */
struct page *page; /* page struct for buffer write */ struct page *page; /* page struct for buffer write */
loff_t offset; /* offset in the file */ loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */ ssize_t size; /* size of the extent */
struct work_struct work; /* data work queue */ struct work_struct work; /* data work queue */
struct kiocb *iocb; /* iocb struct for AIO */
int result; /* error value for AIO */
} ext4_io_end_t; } ext4_io_end_t;
/* /*
@ -460,7 +465,7 @@ struct ext4_new_group_data {
}; };
/* /*
* Flags used by ext4_get_blocks() * Flags used by ext4_map_blocks()
*/ */
/* Allocate any needed blocks and/or convert an unitialized /* Allocate any needed blocks and/or convert an unitialized
extent to be an initialized ext4 */ extent to be an initialized ext4 */
@ -873,7 +878,6 @@ struct ext4_inode_info {
#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ #define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
#define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */ #define EXT4_MOUNT_NO_AUTO_DA_ALLOC 0x10000 /* No auto delalloc mapping */
#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */ #define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */
#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ #define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
@ -982,7 +986,7 @@ struct ext4_super_block {
__le32 s_last_orphan; /* start of list of inodes to delete */ __le32 s_last_orphan; /* start of list of inodes to delete */
__le32 s_hash_seed[4]; /* HTREE hash seed */ __le32 s_hash_seed[4]; /* HTREE hash seed */
__u8 s_def_hash_version; /* Default hash version to use */ __u8 s_def_hash_version; /* Default hash version to use */
__u8 s_reserved_char_pad; __u8 s_jnl_backup_type;
__le16 s_desc_size; /* size of group descriptor */ __le16 s_desc_size; /* size of group descriptor */
/*100*/ __le32 s_default_mount_opts; /*100*/ __le32 s_default_mount_opts;
__le32 s_first_meta_bg; /* First metablock block group */ __le32 s_first_meta_bg; /* First metablock block group */
@ -1000,12 +1004,34 @@ struct ext4_super_block {
__le64 s_mmp_block; /* Block for multi-mount protection */ __le64 s_mmp_block; /* Block for multi-mount protection */
__le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
__u8 s_log_groups_per_flex; /* FLEX_BG group size */ __u8 s_log_groups_per_flex; /* FLEX_BG group size */
__u8 s_reserved_char_pad2; __u8 s_reserved_char_pad;
__le16 s_reserved_pad; __le16 s_reserved_pad;
__le64 s_kbytes_written; /* nr of lifetime kilobytes written */ __le64 s_kbytes_written; /* nr of lifetime kilobytes written */
__u32 s_reserved[160]; /* Padding to the end of the block */ __le32 s_snapshot_inum; /* Inode number of active snapshot */
__le32 s_snapshot_id; /* sequential ID of active snapshot */
__le64 s_snapshot_r_blocks_count; /* reserved blocks for active
snapshot's future use */
__le32 s_snapshot_list; /* inode number of the head of the
on-disk snapshot list */
#define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
__le32 s_error_count; /* number of fs errors */
__le32 s_first_error_time; /* first time an error happened */
__le32 s_first_error_ino; /* inode involved in first error */
__le64 s_first_error_block; /* block involved of first error */
__u8 s_first_error_func[32]; /* function where the error happened */
__le32 s_first_error_line; /* line number where error happened */
__le32 s_last_error_time; /* most recent time of an error */
__le32 s_last_error_ino; /* inode involved in last error */
__le32 s_last_error_line; /* line number where error happened */
__le64 s_last_error_block; /* block involved of last error */
__u8 s_last_error_func[32]; /* function where the error happened */
#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_mount_opts)
__u8 s_mount_opts[64];
__le32 s_reserved[112]; /* Padding to the end of the block */
}; };
#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
#ifdef __KERNEL__ #ifdef __KERNEL__
/* /*
@ -1143,6 +1169,9 @@ struct ext4_sb_info {
/* workqueue for dio unwritten */ /* workqueue for dio unwritten */
struct workqueue_struct *dio_unwritten_wq; struct workqueue_struct *dio_unwritten_wq;
/* timer for periodic error stats printing */
struct timer_list s_err_report;
}; };
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@ -1313,6 +1342,10 @@ EXT4_INODE_BIT_FNS(state, state_flags)
#define EXT4_DEFM_JMODE_DATA 0x0020 #define EXT4_DEFM_JMODE_DATA 0x0020
#define EXT4_DEFM_JMODE_ORDERED 0x0040 #define EXT4_DEFM_JMODE_ORDERED 0x0040
#define EXT4_DEFM_JMODE_WBACK 0x0060 #define EXT4_DEFM_JMODE_WBACK 0x0060
#define EXT4_DEFM_NOBARRIER 0x0100
#define EXT4_DEFM_BLOCK_VALIDITY 0x0200
#define EXT4_DEFM_DISCARD 0x0400
#define EXT4_DEFM_NODELALLOC 0x0800
/* /*
* Default journal batch times * Default journal batch times
@ -1378,6 +1411,43 @@ struct ext4_dir_entry_2 {
~EXT4_DIR_ROUND) ~EXT4_DIR_ROUND)
#define EXT4_MAX_REC_LEN ((1<<16)-1) #define EXT4_MAX_REC_LEN ((1<<16)-1)
/*
* If we ever get support for fs block sizes > page_size, we'll need
* to remove the #if statements in the next two functions...
*/
static inline unsigned int
ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
{
unsigned len = le16_to_cpu(dlen);
#if (PAGE_CACHE_SIZE >= 65536)
if (len == EXT4_MAX_REC_LEN || len == 0)
return blocksize;
return (len & 65532) | ((len & 3) << 16);
#else
return len;
#endif
}
static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
{
if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
BUG();
#if (PAGE_CACHE_SIZE >= 65536)
if (len < 65536)
return cpu_to_le16(len);
if (len == blocksize) {
if (blocksize == 65536)
return cpu_to_le16(EXT4_MAX_REC_LEN);
else
return cpu_to_le16(0);
}
return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
#else
return cpu_to_le16(len);
#endif
}
/* /*
* Hash Tree Directory indexing * Hash Tree Directory indexing
* (c) Daniel Phillips, 2001 * (c) Daniel Phillips, 2001
@ -1510,9 +1580,11 @@ extern unsigned ext4_init_block_bitmap(struct super_block *sb,
ext4_init_block_bitmap(sb, NULL, group, desc) ext4_init_block_bitmap(sb, NULL, group, desc)
/* dir.c */ /* dir.c */
extern int ext4_check_dir_entry(const char *, struct inode *, extern int __ext4_check_dir_entry(const char *, unsigned int, struct inode *,
struct ext4_dir_entry_2 *, struct ext4_dir_entry_2 *,
struct buffer_head *, unsigned int); struct buffer_head *, unsigned int);
#define ext4_check_dir_entry(dir, de, bh, offset) \
__ext4_check_dir_entry(__func__, __LINE__, (dir), (de), (bh), (offset))
extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
__u32 minor_hash, __u32 minor_hash,
struct ext4_dir_entry_2 *dirent); struct ext4_dir_entry_2 *dirent);
@ -1601,8 +1673,6 @@ extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
extern int ext4_ext_migrate(struct inode *); extern int ext4_ext_migrate(struct inode *);
/* namei.c */ /* namei.c */
extern unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize);
extern __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize);
extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *);
extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
@ -1616,25 +1686,38 @@ extern int ext4_group_extend(struct super_block *sb,
ext4_fsblk_t n_blocks_count); ext4_fsblk_t n_blocks_count);
/* super.c */ /* super.c */
extern void __ext4_error(struct super_block *, const char *, const char *, ...) extern void __ext4_error(struct super_block *, const char *, unsigned int,
__attribute__ ((format (printf, 3, 4))); const char *, ...)
#define ext4_error(sb, message...) __ext4_error(sb, __func__, ## message) __attribute__ ((format (printf, 4, 5)));
extern void ext4_error_inode(const char *, struct inode *, const char *, ...) #define ext4_error(sb, message...) __ext4_error(sb, __func__, \
__attribute__ ((format (printf, 3, 4))); __LINE__, ## message)
extern void ext4_error_file(const char *, struct file *, const char *, ...) extern void ext4_error_inode(struct inode *, const char *, unsigned int,
__attribute__ ((format (printf, 3, 4))); ext4_fsblk_t, const char *, ...)
extern void __ext4_std_error(struct super_block *, const char *, int); __attribute__ ((format (printf, 5, 6)));
extern void ext4_abort(struct super_block *, const char *, const char *, ...) extern void ext4_error_file(struct file *, const char *, unsigned int,
__attribute__ ((format (printf, 3, 4))); const char *, ...)
extern void __ext4_warning(struct super_block *, const char *, __attribute__ ((format (printf, 4, 5)));
extern void __ext4_std_error(struct super_block *, const char *,
unsigned int, int);
extern void __ext4_abort(struct super_block *, const char *, unsigned int,
const char *, ...)
__attribute__ ((format (printf, 4, 5)));
#define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \
__LINE__, ## message)
extern void __ext4_warning(struct super_block *, const char *, unsigned int,
const char *, ...) const char *, ...)
__attribute__ ((format (printf, 3, 4))); __attribute__ ((format (printf, 4, 5)));
#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, ## message) #define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \
__LINE__, ## message)
extern void ext4_msg(struct super_block *, const char *, const char *, ...) extern void ext4_msg(struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4))); __attribute__ ((format (printf, 3, 4)));
extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, extern void __ext4_grp_locked_error(const char *, unsigned int, \
const char *, const char *, ...) struct super_block *, ext4_group_t, \
__attribute__ ((format (printf, 4, 5))); unsigned long, ext4_fsblk_t, \
const char *, ...)
__attribute__ ((format (printf, 7, 8)));
#define ext4_grp_locked_error(sb, grp, message...) \
__ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message)
extern void ext4_update_dynamic_rev(struct super_block *sb); extern void ext4_update_dynamic_rev(struct super_block *sb);
extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
__u32 compat); __u32 compat);
@ -1768,7 +1851,7 @@ static inline unsigned int ext4_flex_bg_size(struct ext4_sb_info *sbi)
#define ext4_std_error(sb, errno) \ #define ext4_std_error(sb, errno) \
do { \ do { \
if ((errno)) \ if ((errno)) \
__ext4_std_error((sb), __func__, (errno)); \ __ext4_std_error((sb), __func__, __LINE__, (errno)); \
} while (0) } while (0)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
@ -1860,6 +1943,12 @@ static inline void ext4_unlock_group(struct super_block *sb,
spin_unlock(ext4_group_lock_ptr(sb, group)); spin_unlock(ext4_group_lock_ptr(sb, group));
} }
static inline void ext4_mark_super_dirty(struct super_block *sb)
{
if (EXT4_SB(sb)->s_journal == NULL)
sb->s_dirt =1;
}
/* /*
* Inodes and files operations * Inodes and files operations
*/ */
@ -1905,9 +1994,6 @@ extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
ssize_t len); ssize_t len);
extern int ext4_map_blocks(handle_t *handle, struct inode *inode, extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags); struct ext4_map_blocks *map, int flags);
extern int ext4_get_blocks(handle_t *handle, struct inode *inode,
sector_t block, unsigned int max_blocks,
struct buffer_head *bh, int flags);
extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len); __u64 start, __u64 len);
/* move_extent.c */ /* move_extent.c */

View File

@ -6,29 +6,29 @@
#include <trace/events/ext4.h> #include <trace/events/ext4.h>
int __ext4_journal_get_undo_access(const char *where, handle_t *handle, int __ext4_journal_get_undo_access(const char *where, unsigned int line,
struct buffer_head *bh) handle_t *handle, struct buffer_head *bh)
{ {
int err = 0; int err = 0;
if (ext4_handle_valid(handle)) { if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_undo_access(handle, bh); err = jbd2_journal_get_undo_access(handle, bh);
if (err) if (err)
ext4_journal_abort_handle(where, __func__, bh, ext4_journal_abort_handle(where, line, __func__, bh,
handle, err); handle, err);
} }
return err; return err;
} }
int __ext4_journal_get_write_access(const char *where, handle_t *handle, int __ext4_journal_get_write_access(const char *where, unsigned int line,
struct buffer_head *bh) handle_t *handle, struct buffer_head *bh)
{ {
int err = 0; int err = 0;
if (ext4_handle_valid(handle)) { if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_write_access(handle, bh); err = jbd2_journal_get_write_access(handle, bh);
if (err) if (err)
ext4_journal_abort_handle(where, __func__, bh, ext4_journal_abort_handle(where, line, __func__, bh,
handle, err); handle, err);
} }
return err; return err;
@ -46,9 +46,9 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
* If the handle isn't valid we're not journaling, but we still need to * If the handle isn't valid we're not journaling, but we still need to
* call into ext4_journal_revoke() to put the buffer head. * call into ext4_journal_revoke() to put the buffer head.
*/ */
int __ext4_forget(const char *where, handle_t *handle, int is_metadata, int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
struct inode *inode, struct buffer_head *bh, int is_metadata, struct inode *inode,
ext4_fsblk_t blocknr) struct buffer_head *bh, ext4_fsblk_t blocknr)
{ {
int err; int err;
@ -79,8 +79,8 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
BUFFER_TRACE(bh, "call jbd2_journal_forget"); BUFFER_TRACE(bh, "call jbd2_journal_forget");
err = jbd2_journal_forget(handle, bh); err = jbd2_journal_forget(handle, bh);
if (err) if (err)
ext4_journal_abort_handle(where, __func__, bh, ext4_journal_abort_handle(where, line, __func__,
handle, err); bh, handle, err);
return err; return err;
} }
return 0; return 0;
@ -92,15 +92,16 @@ int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
BUFFER_TRACE(bh, "call jbd2_journal_revoke"); BUFFER_TRACE(bh, "call jbd2_journal_revoke");
err = jbd2_journal_revoke(handle, blocknr, bh); err = jbd2_journal_revoke(handle, blocknr, bh);
if (err) { if (err) {
ext4_journal_abort_handle(where, __func__, bh, handle, err); ext4_journal_abort_handle(where, line, __func__,
ext4_abort(inode->i_sb, __func__, bh, handle, err);
__ext4_abort(inode->i_sb, where, line,
"error %d when attempting revoke", err); "error %d when attempting revoke", err);
} }
BUFFER_TRACE(bh, "exit"); BUFFER_TRACE(bh, "exit");
return err; return err;
} }
int __ext4_journal_get_create_access(const char *where, int __ext4_journal_get_create_access(const char *where, unsigned int line,
handle_t *handle, struct buffer_head *bh) handle_t *handle, struct buffer_head *bh)
{ {
int err = 0; int err = 0;
@ -108,22 +109,23 @@ int __ext4_journal_get_create_access(const char *where,
if (ext4_handle_valid(handle)) { if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_create_access(handle, bh); err = jbd2_journal_get_create_access(handle, bh);
if (err) if (err)
ext4_journal_abort_handle(where, __func__, bh, ext4_journal_abort_handle(where, line, __func__,
handle, err); bh, handle, err);
} }
return err; return err;
} }
int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
struct inode *inode, struct buffer_head *bh) handle_t *handle, struct inode *inode,
struct buffer_head *bh)
{ {
int err = 0; int err = 0;
if (ext4_handle_valid(handle)) { if (ext4_handle_valid(handle)) {
err = jbd2_journal_dirty_metadata(handle, bh); err = jbd2_journal_dirty_metadata(handle, bh);
if (err) if (err)
ext4_journal_abort_handle(where, __func__, bh, ext4_journal_abort_handle(where, line, __func__,
handle, err); bh, handle, err);
} else { } else {
if (inode) if (inode)
mark_buffer_dirty_inode(bh, inode); mark_buffer_dirty_inode(bh, inode);
@ -132,14 +134,33 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
if (inode && inode_needs_sync(inode)) { if (inode && inode_needs_sync(inode)) {
sync_dirty_buffer(bh); sync_dirty_buffer(bh);
if (buffer_req(bh) && !buffer_uptodate(bh)) { if (buffer_req(bh) && !buffer_uptodate(bh)) {
ext4_error(inode->i_sb, struct ext4_super_block *es;
"IO error syncing inode, "
"inode=%lu, block=%llu", es = EXT4_SB(inode->i_sb)->s_es;
inode->i_ino, es->s_last_error_block =
(unsigned long long) bh->b_blocknr); cpu_to_le64(bh->b_blocknr);
ext4_error_inode(inode, where, line,
bh->b_blocknr,
"IO error syncing itable block");
err = -EIO; err = -EIO;
} }
} }
} }
return err; return err;
} }
int __ext4_handle_dirty_super(const char *where, unsigned int line,
handle_t *handle, struct super_block *sb)
{
struct buffer_head *bh = EXT4_SB(sb)->s_sbh;
int err = 0;
if (ext4_handle_valid(handle)) {
err = jbd2_journal_dirty_metadata(handle, bh);
if (err)
ext4_journal_abort_handle(where, line, __func__,
bh, handle, err);
} else
sb->s_dirt = 1;
return err;
}

View File

@ -122,39 +122,47 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
/* /*
* Wrapper functions with which ext4 calls into JBD. * Wrapper functions with which ext4 calls into JBD.
*/ */
void ext4_journal_abort_handle(const char *caller, const char *err_fn, void ext4_journal_abort_handle(const char *caller, unsigned int line,
const char *err_fn,
struct buffer_head *bh, handle_t *handle, int err); struct buffer_head *bh, handle_t *handle, int err);
int __ext4_journal_get_undo_access(const char *where, handle_t *handle, int __ext4_journal_get_undo_access(const char *where, unsigned int line,
struct buffer_head *bh); handle_t *handle, struct buffer_head *bh);
int __ext4_journal_get_write_access(const char *where, handle_t *handle, int __ext4_journal_get_write_access(const char *where, unsigned int line,
struct buffer_head *bh); handle_t *handle, struct buffer_head *bh);
int __ext4_forget(const char *where, handle_t *handle, int is_metadata, int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
struct inode *inode, struct buffer_head *bh, int is_metadata, struct inode *inode,
ext4_fsblk_t blocknr); struct buffer_head *bh, ext4_fsblk_t blocknr);
int __ext4_journal_get_create_access(const char *where, int __ext4_journal_get_create_access(const char *where, unsigned int line,
handle_t *handle, struct buffer_head *bh); handle_t *handle, struct buffer_head *bh);
int __ext4_handle_dirty_metadata(const char *where, handle_t *handle, int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
struct inode *inode, struct buffer_head *bh); handle_t *handle, struct inode *inode,
struct buffer_head *bh);
int __ext4_handle_dirty_super(const char *where, unsigned int line,
handle_t *handle, struct super_block *sb);
#define ext4_journal_get_undo_access(handle, bh) \ #define ext4_journal_get_undo_access(handle, bh) \
__ext4_journal_get_undo_access(__func__, (handle), (bh)) __ext4_journal_get_undo_access(__func__, __LINE__, (handle), (bh))
#define ext4_journal_get_write_access(handle, bh) \ #define ext4_journal_get_write_access(handle, bh) \
__ext4_journal_get_write_access(__func__, (handle), (bh)) __ext4_journal_get_write_access(__func__, __LINE__, (handle), (bh))
#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \ #define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
__ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\ __ext4_forget(__func__, __LINE__, (handle), (is_metadata), (inode), \
(block_nr)) (bh), (block_nr))
#define ext4_journal_get_create_access(handle, bh) \ #define ext4_journal_get_create_access(handle, bh) \
__ext4_journal_get_create_access(__func__, (handle), (bh)) __ext4_journal_get_create_access(__func__, __LINE__, (handle), (bh))
#define ext4_handle_dirty_metadata(handle, inode, bh) \ #define ext4_handle_dirty_metadata(handle, inode, bh) \
__ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) __ext4_handle_dirty_metadata(__func__, __LINE__, (handle), (inode), \
(bh))
#define ext4_handle_dirty_super(handle, sb) \
__ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb))
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
int __ext4_journal_stop(const char *where, handle_t *handle); int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle);
#define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096)
@ -207,7 +215,7 @@ static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
} }
#define ext4_journal_stop(handle) \ #define ext4_journal_stop(handle) \
__ext4_journal_stop(__func__, (handle)) __ext4_journal_stop(__func__, __LINE__, (handle))
static inline handle_t *ext4_journal_current_handle(void) static inline handle_t *ext4_journal_current_handle(void)
{ {
@ -308,17 +316,15 @@ static inline int ext4_should_writeback_data(struct inode *inode)
* This function controls whether or not we should try to go down the * This function controls whether or not we should try to go down the
* dioread_nolock code paths, which makes it safe to avoid taking * dioread_nolock code paths, which makes it safe to avoid taking
* i_mutex for direct I/O reads. This only works for extent-based * i_mutex for direct I/O reads. This only works for extent-based
* files, and it doesn't work for nobh or if data journaling is * files, and it doesn't work if data journaling is enabled, since the
* enabled, since the dioread_nolock code uses b_private to pass * dioread_nolock code uses b_private to pass information back to the
* information back to the I/O completion handler, and this conflicts * I/O completion handler, and this conflicts with the jbd's use of
* with the jbd's use of b_private. * b_private.
*/ */
static inline int ext4_should_dioread_nolock(struct inode *inode) static inline int ext4_should_dioread_nolock(struct inode *inode)
{ {
if (!test_opt(inode->i_sb, DIOREAD_NOLOCK)) if (!test_opt(inode->i_sb, DIOREAD_NOLOCK))
return 0; return 0;
if (test_opt(inode->i_sb, NOBH))
return 0;
if (!S_ISREG(inode->i_mode)) if (!S_ISREG(inode->i_mode))
return 0; return 0;
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))

View File

@ -401,9 +401,9 @@ static int ext4_valid_extent_entries(struct inode *inode,
return 1; return 1;
} }
static int __ext4_ext_check(const char *function, struct inode *inode, static int __ext4_ext_check(const char *function, unsigned int line,
struct ext4_extent_header *eh, struct inode *inode, struct ext4_extent_header *eh,
int depth) int depth)
{ {
const char *error_msg; const char *error_msg;
int max = 0; int max = 0;
@ -436,7 +436,7 @@ static int __ext4_ext_check(const char *function, struct inode *inode,
return 0; return 0;
corrupted: corrupted:
ext4_error_inode(function, inode, ext4_error_inode(inode, function, line, 0,
"bad header/extent: %s - magic %x, " "bad header/extent: %s - magic %x, "
"entries %u, max %u(%u), depth %u(%u)", "entries %u, max %u(%u), depth %u(%u)",
error_msg, le16_to_cpu(eh->eh_magic), error_msg, le16_to_cpu(eh->eh_magic),
@ -447,7 +447,7 @@ corrupted:
} }
#define ext4_ext_check(inode, eh, depth) \ #define ext4_ext_check(inode, eh, depth) \
__ext4_ext_check(__func__, inode, eh, depth) __ext4_ext_check(__func__, __LINE__, inode, eh, depth)
int ext4_ext_check_inode(struct inode *inode) int ext4_ext_check_inode(struct inode *inode)
{ {
@ -1083,7 +1083,6 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
{ {
struct ext4_ext_path *curp = path; struct ext4_ext_path *curp = path;
struct ext4_extent_header *neh; struct ext4_extent_header *neh;
struct ext4_extent_idx *fidx;
struct buffer_head *bh; struct buffer_head *bh;
ext4_fsblk_t newblock; ext4_fsblk_t newblock;
int err = 0; int err = 0;
@ -1144,10 +1143,10 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
ext4_idx_store_pblock(curp->p_idx, newblock); ext4_idx_store_pblock(curp->p_idx, newblock);
neh = ext_inode_hdr(inode); neh = ext_inode_hdr(inode);
fidx = EXT_FIRST_INDEX(neh);
ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n", ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max), le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
le32_to_cpu(fidx->ei_block), idx_pblock(fidx)); le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
idx_pblock(EXT_FIRST_INDEX(neh)));
neh->eh_depth = cpu_to_le16(path->p_depth + 1); neh->eh_depth = cpu_to_le16(path->p_depth + 1);
err = ext4_ext_dirty(handle, inode, curp); err = ext4_ext_dirty(handle, inode, curp);
@ -2954,7 +2953,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
struct ext4_extent *ex1 = NULL; struct ext4_extent *ex1 = NULL;
struct ext4_extent *ex2 = NULL; struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL; struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh;
ext4_lblk_t ee_block, eof_block; ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth; unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock; ext4_fsblk_t newblock;
@ -2971,7 +2969,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
eof_block = map->m_lblk + map->m_len; eof_block = map->m_lblk + map->m_len;
depth = ext_depth(inode); depth = ext_depth(inode);
eh = path[depth].p_hdr;
ex = path[depth].p_ext; ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block); ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex); ee_len = ext4_ext_get_actual_len(ex);
@ -3058,7 +3055,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
err = PTR_ERR(path); err = PTR_ERR(path);
goto out; goto out;
} }
eh = path[depth].p_hdr;
ex = path[depth].p_ext; ex = path[depth].p_ext;
if (ex2 != &newex) if (ex2 != &newex)
ex2 = ex; ex2 = ex;

View File

@ -70,7 +70,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
size_t length = iov_length(iov, nr_segs); size_t length = iov_length(iov, nr_segs);
if (pos > sbi->s_bitmap_maxbytes) if ((pos > sbi->s_bitmap_maxbytes ||
(pos == sbi->s_bitmap_maxbytes && length > 0)))
return -EFBIG; return -EFBIG;
if (pos + length > sbi->s_bitmap_maxbytes) { if (pos + length > sbi->s_bitmap_maxbytes) {
@ -123,7 +124,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (!IS_ERR(cp)) { if (!IS_ERR(cp)) {
memcpy(sbi->s_es->s_last_mounted, cp, memcpy(sbi->s_es->s_last_mounted, cp,
sizeof(sbi->s_es->s_last_mounted)); sizeof(sbi->s_es->s_last_mounted));
sb->s_dirt = 1; ext4_mark_super_dirty(sb);
} }
} }
return dquot_file_open(inode, filp); return dquot_file_open(inode, filp);

View File

@ -279,7 +279,7 @@ out:
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!fatal) if (!fatal)
fatal = err; fatal = err;
sb->s_dirt = 1; ext4_mark_super_dirty(sb);
} else } else
ext4_error(sb, "bit already cleared for inode %lu", ino); ext4_error(sb, "bit already cleared for inode %lu", ino);
@ -965,7 +965,7 @@ got:
percpu_counter_dec(&sbi->s_freeinodes_counter); percpu_counter_dec(&sbi->s_freeinodes_counter);
if (S_ISDIR(mode)) if (S_ISDIR(mode))
percpu_counter_inc(&sbi->s_dirs_counter); percpu_counter_inc(&sbi->s_dirs_counter);
sb->s_dirt = 1; ext4_mark_super_dirty(sb);
if (sbi->s_log_groups_per_flex) { if (sbi->s_log_groups_per_flex) {
flex_group = ext4_flex_group(sbi, group); flex_group = ext4_flex_group(sbi, group);

View File

@ -221,6 +221,7 @@ void ext4_delete_inode(struct inode *inode)
"couldn't extend journal (err %d)", err); "couldn't extend journal (err %d)", err);
stop_handle: stop_handle:
ext4_journal_stop(handle); ext4_journal_stop(handle);
ext4_orphan_del(NULL, inode);
goto no_delete; goto no_delete;
} }
} }
@ -337,9 +338,11 @@ static int ext4_block_to_path(struct inode *inode,
return n; return n;
} }
static int __ext4_check_blockref(const char *function, struct inode *inode, static int __ext4_check_blockref(const char *function, unsigned int line,
struct inode *inode,
__le32 *p, unsigned int max) __le32 *p, unsigned int max)
{ {
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
__le32 *bref = p; __le32 *bref = p;
unsigned int blk; unsigned int blk;
@ -348,8 +351,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
if (blk && if (blk &&
unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
blk, 1))) { blk, 1))) {
ext4_error_inode(function, inode, es->s_last_error_block = cpu_to_le64(blk);
"invalid block reference %u", blk); ext4_error_inode(inode, function, line, blk,
"invalid block");
return -EIO; return -EIO;
} }
} }
@ -358,11 +362,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
#define ext4_check_indirect_blockref(inode, bh) \ #define ext4_check_indirect_blockref(inode, bh) \
__ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ __ext4_check_blockref(__func__, __LINE__, inode, \
(__le32 *)(bh)->b_data, \
EXT4_ADDR_PER_BLOCK((inode)->i_sb)) EXT4_ADDR_PER_BLOCK((inode)->i_sb))
#define ext4_check_inode_blockref(inode) \ #define ext4_check_inode_blockref(inode) \
__ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ __ext4_check_blockref(__func__, __LINE__, inode, \
EXT4_I(inode)->i_data, \
EXT4_NDIR_BLOCKS) EXT4_NDIR_BLOCKS)
/** /**
@ -1128,20 +1134,24 @@ void ext4_da_update_reserve_space(struct inode *inode,
ext4_discard_preallocations(inode); ext4_discard_preallocations(inode);
} }
static int check_block_validity(struct inode *inode, const char *func, static int __check_block_validity(struct inode *inode, const char *func,
unsigned int line,
struct ext4_map_blocks *map) struct ext4_map_blocks *map)
{ {
if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
map->m_len)) { map->m_len)) {
ext4_error_inode(func, inode, ext4_error_inode(inode, func, line, map->m_pblk,
"lblock %lu mapped to illegal pblock %llu " "lblock %lu mapped to illegal pblock "
"(length %d)", (unsigned long) map->m_lblk, "(length %d)", (unsigned long) map->m_lblk,
map->m_pblk, map->m_len); map->m_len);
return -EIO; return -EIO;
} }
return 0; return 0;
} }
#define check_block_validity(inode, map) \
__check_block_validity((inode), __func__, __LINE__, (map))
/* /*
* Return the number of contiguous dirty pages in a given inode * Return the number of contiguous dirty pages in a given inode
* starting at page frame idx. * starting at page frame idx.
@ -1244,7 +1254,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
up_read((&EXT4_I(inode)->i_data_sem)); up_read((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
int ret = check_block_validity(inode, __func__, map); int ret = check_block_validity(inode, map);
if (ret != 0) if (ret != 0)
return ret; return ret;
} }
@ -1324,9 +1334,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
up_write((&EXT4_I(inode)->i_data_sem)); up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
int ret = check_block_validity(inode, int ret = check_block_validity(inode, map);
"ext4_map_blocks_after_alloc",
map);
if (ret != 0) if (ret != 0)
return ret; return ret;
} }
@ -1519,9 +1527,25 @@ static int walk_page_buffers(handle_t *handle,
static int do_journal_get_write_access(handle_t *handle, static int do_journal_get_write_access(handle_t *handle,
struct buffer_head *bh) struct buffer_head *bh)
{ {
int dirty = buffer_dirty(bh);
int ret;
if (!buffer_mapped(bh) || buffer_freed(bh)) if (!buffer_mapped(bh) || buffer_freed(bh))
return 0; return 0;
return ext4_journal_get_write_access(handle, bh); /*
* __block_prepare_write() could have dirtied some buffers. Clean
* the dirty bit as jbd2_journal_get_write_access() could complain
* otherwise about fs integrity issues. Setting of the dirty bit
* by __block_prepare_write() isn't a real problem here as we clear
* the bit before releasing a page lock and thus writeback cannot
* ever write the buffer.
*/
if (dirty)
clear_buffer_dirty(bh);
ret = ext4_journal_get_write_access(handle, bh);
if (!ret && dirty)
ret = ext4_handle_dirty_metadata(handle, NULL, bh);
return ret;
} }
/* /*
@ -2194,7 +2218,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
BUG_ON(!handle); BUG_ON(!handle);
/* /*
* Call ext4_get_blocks() to allocate any delayed allocation * Call ext4_map_blocks() to allocate any delayed allocation
* blocks, or to convert an uninitialized extent to be * blocks, or to convert an uninitialized extent to be
* initialized (in the case where we have written into * initialized (in the case where we have written into
* one or more preallocated blocks). * one or more preallocated blocks).
@ -2203,7 +2227,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
* indicate that we are on the delayed allocation path. This * indicate that we are on the delayed allocation path. This
* affects functions in many different parts of the allocation * affects functions in many different parts of the allocation
* call path. This flag exists primarily because we don't * call path. This flag exists primarily because we don't
* want to change *many* call functions, so ext4_get_blocks() * want to change *many* call functions, so ext4_map_blocks()
* will set the magic i_delalloc_reserved_flag once the * will set the magic i_delalloc_reserved_flag once the
* inode's allocation semaphore is taken. * inode's allocation semaphore is taken.
* *
@ -2221,6 +2245,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
if (blks < 0) { if (blks < 0) {
struct super_block *sb = mpd->inode->i_sb;
err = blks; err = blks;
/* /*
* If get block returns with error we simply * If get block returns with error we simply
@ -2231,7 +2257,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
return 0; return 0;
if (err == -ENOSPC && if (err == -ENOSPC &&
ext4_count_free_blocks(mpd->inode->i_sb)) { ext4_count_free_blocks(sb)) {
mpd->retval = err; mpd->retval = err;
return 0; return 0;
} }
@ -2243,16 +2269,17 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
* writepage and writepages will again try to write * writepage and writepages will again try to write
* the same. * the same.
*/ */
ext4_msg(mpd->inode->i_sb, KERN_CRIT, if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
"delayed block allocation failed for inode %lu at " ext4_msg(sb, KERN_CRIT,
"logical offset %llu with max blocks %zd with " "delayed block allocation failed for inode %lu "
"error %d", mpd->inode->i_ino, "at logical offset %llu with max blocks %zd "
(unsigned long long) next, "with error %d", mpd->inode->i_ino,
mpd->b_size >> mpd->inode->i_blkbits, err); (unsigned long long) next,
printk(KERN_CRIT "This should not happen!! " mpd->b_size >> mpd->inode->i_blkbits, err);
"Data will be lost\n"); ext4_msg(sb, KERN_CRIT,
if (err == -ENOSPC) { "This should not happen!! Data will be lost\n");
ext4_print_free_blocks(mpd->inode); if (err == -ENOSPC)
ext4_print_free_blocks(mpd->inode);
} }
/* invalidate all the pages */ /* invalidate all the pages */
ext4_da_block_invalidatepages(mpd, next, ext4_da_block_invalidatepages(mpd, next,
@ -2320,7 +2347,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
* XXX Don't go larger than mballoc is willing to allocate * XXX Don't go larger than mballoc is willing to allocate
* This is a stopgap solution. We eventually need to fold * This is a stopgap solution. We eventually need to fold
* mpage_da_submit_io() into this function and then call * mpage_da_submit_io() into this function and then call
* ext4_get_blocks() multiple times in a loop * ext4_map_blocks() multiple times in a loop
*/ */
if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
goto flush_it; goto flush_it;
@ -2553,18 +2580,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
/* /*
* This function is used as a standard get_block_t calback function * This function is used as a standard get_block_t calback function
* when there is no desire to allocate any blocks. It is used as a * when there is no desire to allocate any blocks. It is used as a
* callback function for block_prepare_write(), nobh_writepage(), and * callback function for block_prepare_write() and block_write_full_page().
* block_write_full_page(). These functions should only try to map a * These functions should only try to map a single block at a time.
* single block at a time.
* *
* Since this function doesn't do block allocations even if the caller * Since this function doesn't do block allocations even if the caller
* requests it by passing in create=1, it is critically important that * requests it by passing in create=1, it is critically important that
* any caller checks to make sure that any buffer heads are returned * any caller checks to make sure that any buffer heads are returned
* by this function are either all already mapped or marked for * by this function are either all already mapped or marked for
* delayed allocation before calling nobh_writepage() or * delayed allocation before calling block_write_full_page(). Otherwise,
* block_write_full_page(). Otherwise, b_blocknr could be left * b_blocknr could be left unitialized, and the page write functions will
* unitialized, and the page write functions will be taken by * be taken by surprise.
* surprise.
*/ */
static int noalloc_get_block_write(struct inode *inode, sector_t iblock, static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create) struct buffer_head *bh_result, int create)
@ -2749,9 +2774,7 @@ static int ext4_writepage(struct page *page,
return __ext4_journalled_writepage(page, len); return __ext4_journalled_writepage(page, len);
} }
if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) if (page_bufs && buffer_uninit(page_bufs)) {
ret = nobh_writepage(page, noalloc_get_block_write, wbc);
else if (page_bufs && buffer_uninit(page_bufs)) {
ext4_set_bh_endio(page_bufs, inode); ext4_set_bh_endio(page_bufs, inode);
ret = block_write_full_page_endio(page, noalloc_get_block_write, ret = block_write_full_page_endio(page, noalloc_get_block_write,
wbc, ext4_end_io_buffer_write); wbc, ext4_end_io_buffer_write);
@ -3146,13 +3169,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
int ret, retries = 0; int ret, retries = 0;
struct page *page; struct page *page;
pgoff_t index; pgoff_t index;
unsigned from, to;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
handle_t *handle; handle_t *handle;
index = pos >> PAGE_CACHE_SHIFT; index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
if (ext4_nonda_switch(inode->i_sb)) { if (ext4_nonda_switch(inode->i_sb)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC; *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
@ -3668,6 +3688,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io)
return ret; return ret;
} }
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
/* clear the DIO AIO unwritten flag */ /* clear the DIO AIO unwritten flag */
io->flag = 0; io->flag = 0;
return ret; return ret;
@ -3767,6 +3789,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
io->offset = 0; io->offset = 0;
io->size = 0; io->size = 0;
io->page = NULL; io->page = NULL;
io->iocb = NULL;
io->result = 0;
INIT_WORK(&io->work, ext4_end_io_work); INIT_WORK(&io->work, ext4_end_io_work);
INIT_LIST_HEAD(&io->list); INIT_LIST_HEAD(&io->list);
} }
@ -3796,12 +3820,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
if (io_end->flag != EXT4_IO_UNWRITTEN){ if (io_end->flag != EXT4_IO_UNWRITTEN){
ext4_free_io_end(io_end); ext4_free_io_end(io_end);
iocb->private = NULL; iocb->private = NULL;
goto out; out:
if (is_async)
aio_complete(iocb, ret, 0);
return;
} }
io_end->offset = offset; io_end->offset = offset;
io_end->size = size; io_end->size = size;
io_end->flag = EXT4_IO_UNWRITTEN; if (is_async) {
io_end->iocb = iocb;
io_end->result = ret;
}
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
/* queue the work to convert unwritten extents to written */ /* queue the work to convert unwritten extents to written */
@ -3813,9 +3843,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
list_add_tail(&io_end->list, &ei->i_completed_io_list); list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
iocb->private = NULL; iocb->private = NULL;
out:
if (is_async)
aio_complete(iocb, ret, 0);
} }
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
@ -3941,7 +3968,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
return -ENOMEM; return -ENOMEM;
/* /*
* we save the io structure for current async * we save the io structure for current async
* direct IO, so that later ext4_get_blocks() * direct IO, so that later ext4_map_blocks()
* could flag the io structure whether there * could flag the io structure whether there
* is a unwritten extents needs to be converted * is a unwritten extents needs to be converted
* when IO is completed. * when IO is completed.
@ -4132,17 +4159,6 @@ int ext4_block_truncate_page(handle_t *handle,
length = blocksize - (offset & (blocksize - 1)); length = blocksize - (offset & (blocksize - 1));
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
/*
* For "nobh" option, we can only work if we don't need to
* read-in the page - otherwise we create buffers to do the IO.
*/
if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
ext4_should_writeback_data(inode) && PageUptodate(page)) {
zero_user(page, offset, length);
set_page_dirty(page);
goto unlock;
}
if (!page_has_buffers(page)) if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0); create_empty_buffers(page, blocksize, 0);
@ -4492,9 +4508,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
* (should be rare). * (should be rare).
*/ */
if (!bh) { if (!bh) {
EXT4_ERROR_INODE(inode, EXT4_ERROR_INODE_BLOCK(inode, nr,
"Read failure block=%llu", "Read failure");
(unsigned long long) nr);
continue; continue;
} }
@ -4505,27 +4520,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
(__le32 *) bh->b_data + addr_per_block, (__le32 *) bh->b_data + addr_per_block,
depth); depth);
/*
* We've probably journalled the indirect block several
* times during the truncate. But it's no longer
* needed and we now drop it from the transaction via
* jbd2_journal_revoke().
*
* That's easy if it's exclusively part of this
* transaction. But if it's part of the committing
* transaction then jbd2_journal_forget() will simply
* brelse() it. That means that if the underlying
* block is reallocated in ext4_get_block(),
* unmap_underlying_metadata() will find this block
* and will try to get rid of it. damn, damn.
*
* If this block has already been committed to the
* journal, a revoke record will be written. And
* revoke records must be emitted *before* clearing
* this block's bit in the bitmaps.
*/
ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
/* /*
* Everything below this this pointer has been * Everything below this this pointer has been
* released. Now let this top-of-subtree go. * released. Now let this top-of-subtree go.
@ -4550,8 +4544,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
blocks_for_truncate(inode)); blocks_for_truncate(inode));
} }
/*
* The forget flag here is critical because if
* we are journaling (and not doing data
* journaling), we have to make sure a revoke
* record is written to prevent the journal
* replay from overwriting the (former)
* indirect block if it gets reallocated as a
* data block. This must happen in the same
* transaction where the data blocks are
* actually freed.
*/
ext4_free_blocks(handle, inode, 0, nr, 1, ext4_free_blocks(handle, inode, 0, nr, 1,
EXT4_FREE_BLOCKS_METADATA); EXT4_FREE_BLOCKS_METADATA|
EXT4_FREE_BLOCKS_FORGET);
if (parent_bh) { if (parent_bh) {
/* /*
@ -4809,8 +4815,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
bh = sb_getblk(sb, block); bh = sb_getblk(sb, block);
if (!bh) { if (!bh) {
EXT4_ERROR_INODE(inode, "unable to read inode block - " EXT4_ERROR_INODE_BLOCK(inode, block,
"block %llu", block); "unable to read itable block");
return -EIO; return -EIO;
} }
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
@ -4908,8 +4914,8 @@ make_io:
submit_bh(READ_META, bh); submit_bh(READ_META, bh);
wait_on_buffer(bh); wait_on_buffer(bh);
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
EXT4_ERROR_INODE(inode, "unable to read inode " EXT4_ERROR_INODE_BLOCK(inode, block,
"block %llu", block); "unable to read itable block");
brelse(bh); brelse(bh);
return -EIO; return -EIO;
} }
@ -4980,7 +4986,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
/* we are using combined 48 bit field */ /* we are using combined 48 bit field */
i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
le32_to_cpu(raw_inode->i_blocks_lo); le32_to_cpu(raw_inode->i_blocks_lo);
if (ei->i_flags & EXT4_HUGE_FILE_FL) { if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
/* i_blocks represent file system block size */ /* i_blocks represent file system block size */
return i_blocks << (inode->i_blkbits - 9); return i_blocks << (inode->i_blkbits - 9);
} else { } else {
@ -5076,7 +5082,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
transaction_t *transaction; transaction_t *transaction;
tid_t tid; tid_t tid;
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
if (journal->j_running_transaction) if (journal->j_running_transaction)
transaction = journal->j_running_transaction; transaction = journal->j_running_transaction;
else else
@ -5085,7 +5091,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
tid = transaction->t_tid; tid = transaction->t_tid;
else else
tid = journal->j_commit_sequence; tid = journal->j_commit_sequence;
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
ei->i_sync_tid = tid; ei->i_sync_tid = tid;
ei->i_datasync_tid = tid; ei->i_datasync_tid = tid;
} }
@ -5130,7 +5136,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ei->i_file_acl); ei->i_file_acl);
ret = -EIO; ret = -EIO;
goto bad_inode; goto bad_inode;
} else if (ei->i_flags & EXT4_EXTENTS_FL) { } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
(S_ISLNK(inode->i_mode) && (S_ISLNK(inode->i_mode) &&
!ext4_inode_is_fast_symlink(inode))) !ext4_inode_is_fast_symlink(inode)))
@ -5410,9 +5416,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
if (wbc->sync_mode == WB_SYNC_ALL) if (wbc->sync_mode == WB_SYNC_ALL)
sync_dirty_buffer(iloc.bh); sync_dirty_buffer(iloc.bh);
if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
EXT4_ERROR_INODE(inode, EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
"IO error syncing inode (block=%llu)", "IO error syncing inode");
(unsigned long long) iloc.bh->b_blocknr);
err = -EIO; err = -EIO;
} }
brelse(iloc.bh); brelse(iloc.bh);
@ -5487,10 +5492,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
if (attr->ia_size > sbi->s_bitmap_maxbytes) { if (attr->ia_size > sbi->s_bitmap_maxbytes)
error = -EFBIG; return -EFBIG;
goto err_out;
}
} }
} }
@ -5692,7 +5695,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
* Calculate the journal credits for a chunk of data modification. * Calculate the journal credits for a chunk of data modification.
* *
* This is called from DIO, fallocate or whoever calling * This is called from DIO, fallocate or whoever calling
* ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks. * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
* *
* journal buffers for data blocks are not included here, as DIO * journal buffers for data blocks are not included here, as DIO
* and fallocate do no need to journal data buffers. * and fallocate do no need to journal data buffers.
@ -5758,7 +5761,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
{ {
struct ext4_inode *raw_inode; struct ext4_inode *raw_inode;
struct ext4_xattr_ibody_header *header; struct ext4_xattr_ibody_header *header;
struct ext4_xattr_entry *entry;
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
return 0; return 0;
@ -5766,7 +5768,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
raw_inode = ext4_raw_inode(&iloc); raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode); header = IHDR(inode, raw_inode);
entry = IFIRST(header);
/* No extended attributes present */ /* No extended attributes present */
if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||

View File

@ -446,10 +446,11 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += first + i; blocknr += first + i;
ext4_grp_locked_error(sb, e4b->bd_group, ext4_grp_locked_error(sb, e4b->bd_group,
__func__, "double-free of inode" inode ? inode->i_ino : 0,
" %lu's block %llu(bit %u in group %u)", blocknr,
inode ? inode->i_ino : 0, blocknr, "freeing block already freed "
first + i, e4b->bd_group); "(bit %u)",
first + i);
} }
mb_clear_bit(first + i, e4b->bd_info->bb_bitmap); mb_clear_bit(first + i, e4b->bd_info->bb_bitmap);
} }
@ -712,9 +713,9 @@ void ext4_mb_generate_buddy(struct super_block *sb,
grp->bb_fragments = fragments; grp->bb_fragments = fragments;
if (free != grp->bb_free) { if (free != grp->bb_free) {
ext4_grp_locked_error(sb, group, __func__, ext4_grp_locked_error(sb, group, 0, 0,
"EXT4-fs: group %u: %u blocks in bitmap, %u in gd", "%u blocks in bitmap, %u in gd",
group, free, grp->bb_free); free, grp->bb_free);
/* /*
* If we intent to continue, we consider group descritor * If we intent to continue, we consider group descritor
* corrupt and update bb_free using bitmap value * corrupt and update bb_free using bitmap value
@ -1296,10 +1297,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
blocknr = ext4_group_first_block_no(sb, e4b->bd_group); blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += block; blocknr += block;
ext4_grp_locked_error(sb, e4b->bd_group, ext4_grp_locked_error(sb, e4b->bd_group,
__func__, "double-free of inode" inode ? inode->i_ino : 0,
" %lu's block %llu(bit %u in group %u)", blocknr,
inode ? inode->i_ino : 0, blocknr, block, "freeing already freed block "
e4b->bd_group); "(bit %u)", block);
} }
mb_clear_bit(block, EXT4_MB_BITMAP(e4b)); mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
e4b->bd_info->bb_counters[order]++; e4b->bd_info->bb_counters[order]++;
@ -1788,8 +1789,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
* free blocks even though group info says we * free blocks even though group info says we
* we have free blocks * we have free blocks
*/ */
ext4_grp_locked_error(sb, e4b->bd_group, ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
__func__, "%d free blocks as per " "%d free blocks as per "
"group info. But bitmap says 0", "group info. But bitmap says 0",
free); free);
break; break;
@ -1798,8 +1799,8 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
BUG_ON(ex.fe_len <= 0); BUG_ON(ex.fe_len <= 0);
if (free < ex.fe_len) { if (free < ex.fe_len) {
ext4_grp_locked_error(sb, e4b->bd_group, ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
__func__, "%d free blocks as per " "%d free blocks as per "
"group info. But got %d blocks", "group info. But got %d blocks",
free, ex.fe_len); free, ex.fe_len);
/* /*
@ -1821,8 +1822,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
/* /*
* This is a special case for storages like raid5 * This is a special case for storages like raid5
* we try to find stripe-aligned chunks for stripe-size requests * we try to find stripe-aligned chunks for stripe-size-multiple requests
* XXX should do so at least for multiples of stripe size as well
*/ */
static noinline_for_stack static noinline_for_stack
void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
@ -1999,7 +1999,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
ext4_group_t ngroups, group, i; ext4_group_t ngroups, group, i;
int cr; int cr;
int err = 0; int err = 0;
int bsbits;
struct ext4_sb_info *sbi; struct ext4_sb_info *sbi;
struct super_block *sb; struct super_block *sb;
struct ext4_buddy e4b; struct ext4_buddy e4b;
@ -2041,8 +2040,6 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
ac->ac_2order = i - 1; ac->ac_2order = i - 1;
} }
bsbits = ac->ac_sb->s_blocksize_bits;
/* if stream allocation is enabled, use global goal */ /* if stream allocation is enabled, use global goal */
if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
/* TBD: may be hot point */ /* TBD: may be hot point */
@ -2094,8 +2091,8 @@ repeat:
ac->ac_groups_scanned++; ac->ac_groups_scanned++;
if (cr == 0) if (cr == 0)
ext4_mb_simple_scan_group(ac, &e4b); ext4_mb_simple_scan_group(ac, &e4b);
else if (cr == 1 && else if (cr == 1 && sbi->s_stripe &&
ac->ac_g_ex.fe_len == sbi->s_stripe) !(ac->ac_g_ex.fe_len % sbi->s_stripe))
ext4_mb_scan_aligned(ac, &e4b); ext4_mb_scan_aligned(ac, &e4b);
else else
ext4_mb_complex_scan_group(ac, &e4b); ext4_mb_complex_scan_group(ac, &e4b);
@ -2221,7 +2218,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
rc = seq_open(file, &ext4_mb_seq_groups_ops); rc = seq_open(file, &ext4_mb_seq_groups_ops);
if (rc == 0) { if (rc == 0) {
struct seq_file *m = (struct seq_file *)file->private_data; struct seq_file *m = file->private_data;
m->private = sb; m->private = sb;
} }
return rc; return rc;
@ -2560,6 +2557,22 @@ int ext4_mb_release(struct super_block *sb)
return 0; return 0;
} }
static inline void ext4_issue_discard(struct super_block *sb,
ext4_group_t block_group, ext4_grpblk_t block, int count)
{
int ret;
ext4_fsblk_t discard_block;
discard_block = block + ext4_group_first_block_no(sb, block_group);
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
ret = sb_issue_discard(sb, discard_block, count);
if (ret == EOPNOTSUPP) {
ext4_warning(sb, "discard not supported, disabling");
clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
}
}
/* /*
* This function is called by the jbd2 layer once the commit has finished, * This function is called by the jbd2 layer once the commit has finished,
* so we know we can free the blocks that were released with that commit. * so we know we can free the blocks that were released with that commit.
@ -2579,22 +2592,9 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
mb_debug(1, "gonna free %u blocks in group %u (0x%p):", mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
entry->count, entry->group, entry); entry->count, entry->group, entry);
if (test_opt(sb, DISCARD)) { if (test_opt(sb, DISCARD))
int ret; ext4_issue_discard(sb, entry->group,
ext4_fsblk_t discard_block; entry->start_blk, entry->count);
discard_block = entry->start_blk +
ext4_group_first_block_no(sb, entry->group);
trace_ext4_discard_blocks(sb,
(unsigned long long)discard_block,
entry->count);
ret = sb_issue_discard(sb, discard_block, entry->count);
if (ret == EOPNOTSUPP) {
ext4_warning(sb,
"discard not supported, disabling");
clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
}
}
err = ext4_mb_load_buddy(sb, entry->group, &e4b); err = ext4_mb_load_buddy(sb, entry->group, &e4b);
/* we expect to find existing buddy because it's pinned */ /* we expect to find existing buddy because it's pinned */
@ -2712,7 +2712,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
handle_t *handle, unsigned int reserv_blks) handle_t *handle, unsigned int reserv_blks)
{ {
struct buffer_head *bitmap_bh = NULL; struct buffer_head *bitmap_bh = NULL;
struct ext4_super_block *es;
struct ext4_group_desc *gdp; struct ext4_group_desc *gdp;
struct buffer_head *gdp_bh; struct buffer_head *gdp_bh;
struct ext4_sb_info *sbi; struct ext4_sb_info *sbi;
@ -2725,8 +2724,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
sb = ac->ac_sb; sb = ac->ac_sb;
sbi = EXT4_SB(sb); sbi = EXT4_SB(sb);
es = sbi->s_es;
err = -EIO; err = -EIO;
bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group); bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
@ -2812,7 +2809,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh); err = ext4_handle_dirty_metadata(handle, NULL, gdp_bh);
out_err: out_err:
sb->s_dirt = 1; ext4_mark_super_dirty(sb);
brelse(bitmap_bh); brelse(bitmap_bh);
return err; return err;
} }
@ -2850,7 +2847,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
int bsbits, max; int bsbits, max;
ext4_lblk_t end; ext4_lblk_t end;
loff_t size, orig_size, start_off; loff_t size, orig_size, start_off;
ext4_lblk_t start, orig_start; ext4_lblk_t start;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_prealloc_space *pa; struct ext4_prealloc_space *pa;
@ -2881,6 +2878,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
size = size << bsbits; size = size << bsbits;
if (size < i_size_read(ac->ac_inode)) if (size < i_size_read(ac->ac_inode))
size = i_size_read(ac->ac_inode); size = i_size_read(ac->ac_inode);
orig_size = size;
/* max size of free chunks */ /* max size of free chunks */
max = 2 << bsbits; max = 2 << bsbits;
@ -2922,8 +2920,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits; start_off = (loff_t)ac->ac_o_ex.fe_logical << bsbits;
size = ac->ac_o_ex.fe_len << bsbits; size = ac->ac_o_ex.fe_len << bsbits;
} }
orig_size = size = size >> bsbits; size = size >> bsbits;
orig_start = start = start_off >> bsbits; start = start_off >> bsbits;
/* don't cover already allocated blocks in selected range */ /* don't cover already allocated blocks in selected range */
if (ar->pleft && start <= ar->lleft) { if (ar->pleft && start <= ar->lleft) {
@ -3547,7 +3545,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
ext4_group_t group; ext4_group_t group;
ext4_grpblk_t bit; ext4_grpblk_t bit;
unsigned long long grp_blk_start; unsigned long long grp_blk_start;
sector_t start;
int err = 0; int err = 0;
int free = 0; int free = 0;
@ -3567,10 +3564,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
if (bit >= end) if (bit >= end)
break; break;
next = mb_find_next_bit(bitmap_bh->b_data, end, bit); next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
start = ext4_group_first_block_no(sb, group) + bit;
mb_debug(1, " free preallocated %u/%u in group %u\n", mb_debug(1, " free preallocated %u/%u in group %u\n",
(unsigned) start, (unsigned) next - bit, (unsigned) ext4_group_first_block_no(sb, group) + bit,
(unsigned) group); (unsigned) next - bit, (unsigned) group);
free += next - bit; free += next - bit;
if (ac) { if (ac) {
@ -3581,7 +3577,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
trace_ext4_mballoc_discard(ac); trace_ext4_mballoc_discard(ac);
} }
trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit, trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
next - bit); next - bit);
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit); mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1; bit = next + 1;
@ -3591,8 +3587,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
pa, (unsigned long) pa->pa_lstart, pa, (unsigned long) pa->pa_lstart,
(unsigned long) pa->pa_pstart, (unsigned long) pa->pa_pstart,
(unsigned long) pa->pa_len); (unsigned long) pa->pa_len);
ext4_grp_locked_error(sb, group, ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
__func__, "free %u, pa_free %u",
free, pa->pa_free); free, pa->pa_free);
/* /*
* pa is already deleted so we use the value obtained * pa is already deleted so we use the value obtained
@ -3613,7 +3608,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
ext4_group_t group; ext4_group_t group;
ext4_grpblk_t bit; ext4_grpblk_t bit;
trace_ext4_mb_release_group_pa(ac, pa); trace_ext4_mb_release_group_pa(sb, ac, pa);
BUG_ON(pa->pa_deleted == 0); BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0); BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@ -3889,6 +3884,9 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
struct super_block *sb = ac->ac_sb; struct super_block *sb = ac->ac_sb;
ext4_group_t ngroups, i; ext4_group_t ngroups, i;
if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
return;
printk(KERN_ERR "EXT4-fs: Can't allocate:" printk(KERN_ERR "EXT4-fs: Can't allocate:"
" Allocation context details:\n"); " Allocation context details:\n");
printk(KERN_ERR "EXT4-fs: status %d flags %d\n", printk(KERN_ERR "EXT4-fs: status %d flags %d\n",
@ -4255,7 +4253,7 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed)
* to usual allocation * to usual allocation
*/ */
ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
struct ext4_allocation_request *ar, int *errp) struct ext4_allocation_request *ar, int *errp)
{ {
int freed; int freed;
struct ext4_allocation_context *ac = NULL; struct ext4_allocation_context *ac = NULL;
@ -4299,7 +4297,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
inquota = ar->len; inquota = ar->len;
if (ar->len == 0) { if (ar->len == 0) {
*errp = -EDQUOT; *errp = -EDQUOT;
goto out3; goto out;
} }
} }
@ -4307,13 +4305,13 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
if (!ac) { if (!ac) {
ar->len = 0; ar->len = 0;
*errp = -ENOMEM; *errp = -ENOMEM;
goto out1; goto out;
} }
*errp = ext4_mb_initialize_context(ac, ar); *errp = ext4_mb_initialize_context(ac, ar);
if (*errp) { if (*errp) {
ar->len = 0; ar->len = 0;
goto out2; goto out;
} }
ac->ac_op = EXT4_MB_HISTORY_PREALLOC; ac->ac_op = EXT4_MB_HISTORY_PREALLOC;
@ -4322,7 +4320,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
ext4_mb_normalize_request(ac, ar); ext4_mb_normalize_request(ac, ar);
repeat: repeat:
/* allocate space in core */ /* allocate space in core */
ext4_mb_regular_allocator(ac); *errp = ext4_mb_regular_allocator(ac);
if (*errp)
goto errout;
/* as we've just preallocated more space than /* as we've just preallocated more space than
* user requested orinally, we store allocated * user requested orinally, we store allocated
@ -4333,7 +4333,7 @@ repeat:
} }
if (likely(ac->ac_status == AC_STATUS_FOUND)) { if (likely(ac->ac_status == AC_STATUS_FOUND)) {
*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks);
if (*errp == -EAGAIN) { if (*errp == -EAGAIN) {
/* /*
* drop the reference that we took * drop the reference that we took
* in ext4_mb_use_best_found * in ext4_mb_use_best_found
@ -4344,12 +4344,10 @@ repeat:
ac->ac_b_ex.fe_len = 0; ac->ac_b_ex.fe_len = 0;
ac->ac_status = AC_STATUS_CONTINUE; ac->ac_status = AC_STATUS_CONTINUE;
goto repeat; goto repeat;
} else if (*errp) { } else if (*errp)
errout:
ext4_discard_allocated_blocks(ac); ext4_discard_allocated_blocks(ac);
ac->ac_b_ex.fe_len = 0; else {
ar->len = 0;
ext4_mb_show_ac(ac);
} else {
block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
ar->len = ac->ac_b_ex.fe_len; ar->len = ac->ac_b_ex.fe_len;
} }
@ -4358,19 +4356,19 @@ repeat:
if (freed) if (freed)
goto repeat; goto repeat;
*errp = -ENOSPC; *errp = -ENOSPC;
}
if (*errp) {
ac->ac_b_ex.fe_len = 0; ac->ac_b_ex.fe_len = 0;
ar->len = 0; ar->len = 0;
ext4_mb_show_ac(ac); ext4_mb_show_ac(ac);
} }
ext4_mb_release_context(ac); ext4_mb_release_context(ac);
out:
out2: if (ac)
kmem_cache_free(ext4_ac_cachep, ac); kmem_cache_free(ext4_ac_cachep, ac);
out1:
if (inquota && ar->len < inquota) if (inquota && ar->len < inquota)
dquot_free_block(ar->inode, inquota - ar->len); dquot_free_block(ar->inode, inquota - ar->len);
out3:
if (!ar->len) { if (!ar->len) {
if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
/* release all the reserved blocks if non delalloc */ /* release all the reserved blocks if non delalloc */
@ -4402,6 +4400,7 @@ static noinline_for_stack int
ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
struct ext4_free_data *new_entry) struct ext4_free_data *new_entry)
{ {
ext4_group_t group = e4b->bd_group;
ext4_grpblk_t block; ext4_grpblk_t block;
struct ext4_free_data *entry; struct ext4_free_data *entry;
struct ext4_group_info *db = e4b->bd_info; struct ext4_group_info *db = e4b->bd_info;
@ -4434,9 +4433,9 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
else if (block >= (entry->start_blk + entry->count)) else if (block >= (entry->start_blk + entry->count))
n = &(*n)->rb_right; n = &(*n)->rb_right;
else { else {
ext4_grp_locked_error(sb, e4b->bd_group, __func__, ext4_grp_locked_error(sb, group, 0,
"Double free of blocks %d (%d %d)", ext4_group_first_block_no(sb, group) + block,
block, entry->start_blk, entry->count); "Block already on to-be-freed list");
return 0; return 0;
} }
} }
@ -4494,7 +4493,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct ext4_allocation_context *ac = NULL; struct ext4_allocation_context *ac = NULL;
struct ext4_group_desc *gdp; struct ext4_group_desc *gdp;
struct ext4_super_block *es;
unsigned long freed = 0; unsigned long freed = 0;
unsigned int overflow; unsigned int overflow;
ext4_grpblk_t bit; ext4_grpblk_t bit;
@ -4513,7 +4511,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
} }
sbi = EXT4_SB(sb); sbi = EXT4_SB(sb);
es = EXT4_SB(sb)->s_es;
if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) &&
!ext4_data_block_valid(sbi, block, count)) { !ext4_data_block_valid(sbi, block, count)) {
ext4_error(sb, "Freeing blocks not in datazone - " ext4_error(sb, "Freeing blocks not in datazone - "
@ -4647,6 +4644,8 @@ do_more:
mb_clear_bits(bitmap_bh->b_data, bit, count); mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_free_blocks(inode, &e4b, bit, count); mb_free_blocks(inode, &e4b, bit, count);
ext4_mb_return_to_preallocation(inode, &e4b, block, count); ext4_mb_return_to_preallocation(inode, &e4b, block, count);
if (test_opt(sb, DISCARD))
ext4_issue_discard(sb, block_group, bit, count);
} }
ret = ext4_free_blks_count(sb, gdp) + count; ret = ext4_free_blks_count(sb, gdp) + count;
@ -4680,7 +4679,7 @@ do_more:
put_bh(bitmap_bh); put_bh(bitmap_bh);
goto do_more; goto do_more;
} }
sb->s_dirt = 1; ext4_mark_super_dirty(sb);
error_return: error_return:
if (freed) if (freed)
dquot_free_block(inode, freed); dquot_free_block(inode, freed);

View File

@ -376,7 +376,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
* We have the extent map build with the tmp inode. * We have the extent map build with the tmp inode.
* Now copy the i_data across * Now copy the i_data across
*/ */
ei->i_flags |= EXT4_EXTENTS_FL; ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS);
memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data)); memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
/* /*

View File

@ -148,17 +148,17 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
*/ */
static int static int
mext_check_null_inode(struct inode *inode1, struct inode *inode2, mext_check_null_inode(struct inode *inode1, struct inode *inode2,
const char *function) const char *function, unsigned int line)
{ {
int ret = 0; int ret = 0;
if (inode1 == NULL) { if (inode1 == NULL) {
__ext4_error(inode2->i_sb, function, __ext4_error(inode2->i_sb, function, line,
"Both inodes should not be NULL: " "Both inodes should not be NULL: "
"inode1 NULL inode2 %lu", inode2->i_ino); "inode1 NULL inode2 %lu", inode2->i_ino);
ret = -EIO; ret = -EIO;
} else if (inode2 == NULL) { } else if (inode2 == NULL) {
__ext4_error(inode1->i_sb, function, __ext4_error(inode1->i_sb, function, line,
"Both inodes should not be NULL: " "Both inodes should not be NULL: "
"inode1 %lu inode2 NULL", inode1->i_ino); "inode1 %lu inode2 NULL", inode1->i_ino);
ret = -EIO; ret = -EIO;
@ -1084,7 +1084,7 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
BUG_ON(inode1 == NULL && inode2 == NULL); BUG_ON(inode1 == NULL && inode2 == NULL);
ret = mext_check_null_inode(inode1, inode2, __func__); ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
if (ret < 0) if (ret < 0)
goto out; goto out;
@ -1121,7 +1121,7 @@ mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
BUG_ON(inode1 == NULL && inode2 == NULL); BUG_ON(inode1 == NULL && inode2 == NULL);
ret = mext_check_null_inode(inode1, inode2, __func__); ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
if (ret < 0) if (ret < 0)
goto out; goto out;

View File

@ -179,30 +179,6 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode); struct inode *inode);
unsigned int ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
{
unsigned len = le16_to_cpu(dlen);
if (len == EXT4_MAX_REC_LEN || len == 0)
return blocksize;
return (len & 65532) | ((len & 3) << 16);
}
__le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
{
if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
BUG();
if (len < 65536)
return cpu_to_le16(len);
if (len == blocksize) {
if (blocksize == 65536)
return cpu_to_le16(EXT4_MAX_REC_LEN);
else
return cpu_to_le16(0);
}
return cpu_to_le16((len & 65532) | ((len >> 16) & 3));
}
/* /*
* p is at least 6 bytes before the end of page * p is at least 6 bytes before the end of page
*/ */
@ -605,7 +581,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
dir->i_sb->s_blocksize - dir->i_sb->s_blocksize -
EXT4_DIR_REC_LEN(0)); EXT4_DIR_REC_LEN(0));
for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
if (!ext4_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, if (!ext4_check_dir_entry(dir, de, bh,
(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
+((char *)de - bh->b_data))) { +((char *)de - bh->b_data))) {
/* On error, skip the f_pos to the next block. */ /* On error, skip the f_pos to the next block. */
@ -844,8 +820,7 @@ static inline int search_dirblock(struct buffer_head *bh,
if ((char *) de + namelen <= dlimit && if ((char *) de + namelen <= dlimit &&
ext4_match (namelen, name, de)) { ext4_match (namelen, name, de)) {
/* found a match - just to be sure, do a full check */ /* found a match - just to be sure, do a full check */
if (!ext4_check_dir_entry("ext4_find_entry", if (!ext4_check_dir_entry(dir, de, bh, offset))
dir, de, bh, offset))
return -1; return -1;
*res_dir = de; *res_dir = de;
return 1; return 1;
@ -1019,7 +994,7 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
int off = (block << EXT4_BLOCK_SIZE_BITS(sb)) int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
+ ((char *) de - bh->b_data); + ((char *) de - bh->b_data);
if (!ext4_check_dir_entry(__func__, dir, de, bh, off)) { if (!ext4_check_dir_entry(dir, de, bh, off)) {
brelse(bh); brelse(bh);
*err = ERR_BAD_DX_DIR; *err = ERR_BAD_DX_DIR;
goto errout; goto errout;
@ -1088,7 +1063,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
struct dentry *ext4_get_parent(struct dentry *child) struct dentry *ext4_get_parent(struct dentry *child)
{ {
__u32 ino; __u32 ino;
struct inode *inode;
static const struct qstr dotdot = { static const struct qstr dotdot = {
.name = "..", .name = "..",
.len = 2, .len = 2,
@ -1097,7 +1071,6 @@ struct dentry *ext4_get_parent(struct dentry *child)
struct buffer_head *bh; struct buffer_head *bh;
bh = ext4_find_entry(child->d_inode, &dotdot, &de); bh = ext4_find_entry(child->d_inode, &dotdot, &de);
inode = NULL;
if (!bh) if (!bh)
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
ino = le32_to_cpu(de->inode); ino = le32_to_cpu(de->inode);
@ -1305,8 +1278,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
de = (struct ext4_dir_entry_2 *)bh->b_data; de = (struct ext4_dir_entry_2 *)bh->b_data;
top = bh->b_data + blocksize - reclen; top = bh->b_data + blocksize - reclen;
while ((char *) de <= top) { while ((char *) de <= top) {
if (!ext4_check_dir_entry("ext4_add_entry", dir, de, if (!ext4_check_dir_entry(dir, de, bh, offset))
bh, offset))
return -EIO; return -EIO;
if (ext4_match(namelen, name, de)) if (ext4_match(namelen, name, de))
return -EEXIST; return -EEXIST;
@ -1673,7 +1645,7 @@ static int ext4_delete_entry(handle_t *handle,
pde = NULL; pde = NULL;
de = (struct ext4_dir_entry_2 *) bh->b_data; de = (struct ext4_dir_entry_2 *) bh->b_data;
while (i < bh->b_size) { while (i < bh->b_size) {
if (!ext4_check_dir_entry("ext4_delete_entry", dir, de, bh, i)) if (!ext4_check_dir_entry(dir, de, bh, i))
return -EIO; return -EIO;
if (de == de_del) { if (de == de_del) {
BUFFER_TRACE(bh, "get_write_access"); BUFFER_TRACE(bh, "get_write_access");
@ -1956,7 +1928,7 @@ static int empty_dir(struct inode *inode)
} }
de = (struct ext4_dir_entry_2 *) bh->b_data; de = (struct ext4_dir_entry_2 *) bh->b_data;
} }
if (!ext4_check_dir_entry("empty_dir", inode, de, bh, offset)) { if (!ext4_check_dir_entry(inode, de, bh, offset)) {
de = (struct ext4_dir_entry_2 *)(bh->b_data + de = (struct ext4_dir_entry_2 *)(bh->b_data +
sb->s_blocksize); sb->s_blocksize);
offset = (offset | (sb->s_blocksize - 1)) + 1; offset = (offset | (sb->s_blocksize - 1)) + 1;

View File

@ -921,8 +921,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
&sbi->s_flex_groups[flex_group].free_inodes); &sbi->s_flex_groups[flex_group].free_inodes);
} }
ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); ext4_handle_dirty_super(handle, sb);
sb->s_dirt = 1;
exit_journal: exit_journal:
mutex_unlock(&sbi->s_resize_lock); mutex_unlock(&sbi->s_resize_lock);
@ -953,7 +952,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_fsblk_t n_blocks_count) ext4_fsblk_t n_blocks_count)
{ {
ext4_fsblk_t o_blocks_count; ext4_fsblk_t o_blocks_count;
ext4_group_t o_groups_count;
ext4_grpblk_t last; ext4_grpblk_t last;
ext4_grpblk_t add; ext4_grpblk_t add;
struct buffer_head *bh; struct buffer_head *bh;
@ -965,7 +963,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
* yet: we're going to revalidate es->s_blocks_count after * yet: we're going to revalidate es->s_blocks_count after
* taking the s_resize_lock below. */ * taking the s_resize_lock below. */
o_blocks_count = ext4_blocks_count(es); o_blocks_count = ext4_blocks_count(es);
o_groups_count = EXT4_SB(sb)->s_groups_count;
if (test_opt(sb, DEBUG)) if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n", printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n",
@ -1045,13 +1042,12 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
goto exit_put; goto exit_put;
} }
ext4_blocks_count_set(es, o_blocks_count + add); ext4_blocks_count_set(es, o_blocks_count + add);
ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
sb->s_dirt = 1;
mutex_unlock(&EXT4_SB(sb)->s_resize_lock); mutex_unlock(&EXT4_SB(sb)->s_resize_lock);
ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add); o_blocks_count + add);
/* We add the blocks to the bitmap and set the group need init bit */ /* We add the blocks to the bitmap and set the group need init bit */
ext4_add_groupblocks(handle, sb, o_blocks_count, add); ext4_add_groupblocks(handle, sb, o_blocks_count, add);
ext4_handle_dirty_super(handle, sb);
ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add); o_blocks_count + add);
if ((err = ext4_journal_stop(handle))) if ((err = ext4_journal_stop(handle)))

View File

@ -241,14 +241,14 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
if (sb->s_flags & MS_RDONLY) if (sb->s_flags & MS_RDONLY)
return ERR_PTR(-EROFS); return ERR_PTR(-EROFS);
vfs_check_frozen(sb, SB_FREEZE_WRITE); vfs_check_frozen(sb, SB_FREEZE_TRANS);
/* Special case here: if the journal has aborted behind our /* Special case here: if the journal has aborted behind our
* backs (eg. EIO in the commit thread), then we still need to * backs (eg. EIO in the commit thread), then we still need to
* take the FS itself readonly cleanly. */ * take the FS itself readonly cleanly. */
journal = EXT4_SB(sb)->s_journal; journal = EXT4_SB(sb)->s_journal;
if (journal) { if (journal) {
if (is_journal_aborted(journal)) { if (is_journal_aborted(journal)) {
ext4_abort(sb, __func__, "Detected aborted journal"); ext4_abort(sb, "Detected aborted journal");
return ERR_PTR(-EROFS); return ERR_PTR(-EROFS);
} }
return jbd2_journal_start(journal, nblocks); return jbd2_journal_start(journal, nblocks);
@ -262,7 +262,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
* that sync() will call the filesystem's write_super callback if * that sync() will call the filesystem's write_super callback if
* appropriate. * appropriate.
*/ */
int __ext4_journal_stop(const char *where, handle_t *handle) int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
{ {
struct super_block *sb; struct super_block *sb;
int err; int err;
@ -279,12 +279,13 @@ int __ext4_journal_stop(const char *where, handle_t *handle)
if (!err) if (!err)
err = rc; err = rc;
if (err) if (err)
__ext4_std_error(sb, where, err); __ext4_std_error(sb, where, line, err);
return err; return err;
} }
void ext4_journal_abort_handle(const char *caller, const char *err_fn, void ext4_journal_abort_handle(const char *caller, unsigned int line,
struct buffer_head *bh, handle_t *handle, int err) const char *err_fn, struct buffer_head *bh,
handle_t *handle, int err)
{ {
char nbuf[16]; char nbuf[16];
const char *errstr = ext4_decode_error(NULL, err, nbuf); const char *errstr = ext4_decode_error(NULL, err, nbuf);
@ -300,12 +301,47 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
if (is_handle_aborted(handle)) if (is_handle_aborted(handle))
return; return;
printk(KERN_ERR "%s: aborting transaction: %s in %s\n", printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n",
caller, errstr, err_fn); caller, line, errstr, err_fn);
jbd2_journal_abort_handle(handle); jbd2_journal_abort_handle(handle);
} }
static void __save_error_info(struct super_block *sb, const char *func,
unsigned int line)
{
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
es->s_last_error_time = cpu_to_le32(get_seconds());
strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
es->s_last_error_line = cpu_to_le32(line);
if (!es->s_first_error_time) {
es->s_first_error_time = es->s_last_error_time;
strncpy(es->s_first_error_func, func,
sizeof(es->s_first_error_func));
es->s_first_error_line = cpu_to_le32(line);
es->s_first_error_ino = es->s_last_error_ino;
es->s_first_error_block = es->s_last_error_block;
}
/*
* Start the daily error reporting function if it hasn't been
* started already
*/
if (!es->s_error_count)
mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
}
static void save_error_info(struct super_block *sb, const char *func,
unsigned int line)
{
__save_error_info(sb, func, line);
ext4_commit_super(sb, 1);
}
/* Deal with the reporting of failure conditions on a filesystem such as /* Deal with the reporting of failure conditions on a filesystem such as
* inconsistencies detected or read IO failures. * inconsistencies detected or read IO failures.
* *
@ -323,11 +359,6 @@ void ext4_journal_abort_handle(const char *caller, const char *err_fn,
static void ext4_handle_error(struct super_block *sb) static void ext4_handle_error(struct super_block *sb)
{ {
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
if (sb->s_flags & MS_RDONLY) if (sb->s_flags & MS_RDONLY)
return; return;
@ -342,19 +373,19 @@ static void ext4_handle_error(struct super_block *sb)
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
sb->s_flags |= MS_RDONLY; sb->s_flags |= MS_RDONLY;
} }
ext4_commit_super(sb, 1);
if (test_opt(sb, ERRORS_PANIC)) if (test_opt(sb, ERRORS_PANIC))
panic("EXT4-fs (device %s): panic forced after error\n", panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id); sb->s_id);
} }
void __ext4_error(struct super_block *sb, const char *function, void __ext4_error(struct super_block *sb, const char *function,
const char *fmt, ...) unsigned int line, const char *fmt, ...)
{ {
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ",
sb->s_id, function, line, current->comm);
vprintk(fmt, args); vprintk(fmt, args);
printk("\n"); printk("\n");
va_end(args); va_end(args);
@ -362,14 +393,22 @@ void __ext4_error(struct super_block *sb, const char *function,
ext4_handle_error(sb); ext4_handle_error(sb);
} }
void ext4_error_inode(const char *function, struct inode *inode, void ext4_error_inode(struct inode *inode, const char *function,
unsigned int line, ext4_fsblk_t block,
const char *fmt, ...) const char *fmt, ...)
{ {
va_list args; va_list args;
struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
es->s_last_error_block = cpu_to_le64(block);
save_error_info(inode->i_sb, function, line);
va_start(args, fmt); va_start(args, fmt);
printk(KERN_CRIT "EXT4-fs error (device %s): %s: inode #%lu: (comm %s) ", printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
inode->i_sb->s_id, function, inode->i_ino, current->comm); inode->i_sb->s_id, function, line, inode->i_ino);
if (block)
printk("block %llu: ", block);
printk("comm %s: ", current->comm);
vprintk(fmt, args); vprintk(fmt, args);
printk("\n"); printk("\n");
va_end(args); va_end(args);
@ -377,20 +416,26 @@ void ext4_error_inode(const char *function, struct inode *inode,
ext4_handle_error(inode->i_sb); ext4_handle_error(inode->i_sb);
} }
void ext4_error_file(const char *function, struct file *file, void ext4_error_file(struct file *file, const char *function,
const char *fmt, ...) unsigned int line, const char *fmt, ...)
{ {
va_list args; va_list args;
struct ext4_super_block *es;
struct inode *inode = file->f_dentry->d_inode; struct inode *inode = file->f_dentry->d_inode;
char pathname[80], *path; char pathname[80], *path;
es = EXT4_SB(inode->i_sb)->s_es;
es->s_last_error_ino = cpu_to_le32(inode->i_ino);
save_error_info(inode->i_sb, function, line);
va_start(args, fmt); va_start(args, fmt);
path = d_path(&(file->f_path), pathname, sizeof(pathname)); path = d_path(&(file->f_path), pathname, sizeof(pathname));
if (!path) if (!path)
path = "(unknown)"; path = "(unknown)";
printk(KERN_CRIT printk(KERN_CRIT
"EXT4-fs error (device %s): %s: inode #%lu (comm %s path %s): ", "EXT4-fs error (device %s): %s:%d: inode #%lu "
inode->i_sb->s_id, function, inode->i_ino, current->comm, path); "(comm %s path %s): ",
inode->i_sb->s_id, function, line, inode->i_ino,
current->comm, path);
vprintk(fmt, args); vprintk(fmt, args);
printk("\n"); printk("\n");
va_end(args); va_end(args);
@ -435,7 +480,8 @@ static const char *ext4_decode_error(struct super_block *sb, int errno,
/* __ext4_std_error decodes expected errors from journaling functions /* __ext4_std_error decodes expected errors from journaling functions
* automatically and invokes the appropriate error response. */ * automatically and invokes the appropriate error response. */
void __ext4_std_error(struct super_block *sb, const char *function, int errno) void __ext4_std_error(struct super_block *sb, const char *function,
unsigned int line, int errno)
{ {
char nbuf[16]; char nbuf[16];
const char *errstr; const char *errstr;
@ -448,8 +494,9 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno)
return; return;
errstr = ext4_decode_error(sb, errno, nbuf); errstr = ext4_decode_error(sb, errno, nbuf);
printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
sb->s_id, function, errstr); sb->s_id, function, line, errstr);
save_error_info(sb, function, line);
ext4_handle_error(sb); ext4_handle_error(sb);
} }
@ -464,29 +511,29 @@ void __ext4_std_error(struct super_block *sb, const char *function, int errno)
* case we take the easy way out and panic immediately. * case we take the easy way out and panic immediately.
*/ */
void ext4_abort(struct super_block *sb, const char *function, void __ext4_abort(struct super_block *sb, const char *function,
const char *fmt, ...) unsigned int line, const char *fmt, ...)
{ {
va_list args; va_list args;
save_error_info(sb, function, line);
va_start(args, fmt); va_start(args, fmt);
printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
function, line);
vprintk(fmt, args); vprintk(fmt, args);
printk("\n"); printk("\n");
va_end(args); va_end(args);
if ((sb->s_flags & MS_RDONLY) == 0) {
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
sb->s_flags |= MS_RDONLY;
EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
if (EXT4_SB(sb)->s_journal)
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
save_error_info(sb, function, line);
}
if (test_opt(sb, ERRORS_PANIC)) if (test_opt(sb, ERRORS_PANIC))
panic("EXT4-fs panic from previous error\n"); panic("EXT4-fs panic from previous error\n");
if (sb->s_flags & MS_RDONLY)
return;
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
sb->s_flags |= MS_RDONLY;
EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
if (EXT4_SB(sb)->s_journal)
jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
} }
void ext4_msg (struct super_block * sb, const char *prefix, void ext4_msg (struct super_block * sb, const char *prefix,
@ -502,38 +549,47 @@ void ext4_msg (struct super_block * sb, const char *prefix,
} }
void __ext4_warning(struct super_block *sb, const char *function, void __ext4_warning(struct super_block *sb, const char *function,
const char *fmt, ...) unsigned int line, const char *fmt, ...)
{ {
va_list args; va_list args;
va_start(args, fmt); va_start(args, fmt);
printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ", printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ",
sb->s_id, function); sb->s_id, function, line);
vprintk(fmt, args); vprintk(fmt, args);
printk("\n"); printk("\n");
va_end(args); va_end(args);
} }
void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, void __ext4_grp_locked_error(const char *function, unsigned int line,
const char *function, const char *fmt, ...) struct super_block *sb, ext4_group_t grp,
unsigned long ino, ext4_fsblk_t block,
const char *fmt, ...)
__releases(bitlock) __releases(bitlock)
__acquires(bitlock) __acquires(bitlock)
{ {
va_list args; va_list args;
struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct ext4_super_block *es = EXT4_SB(sb)->s_es;
es->s_last_error_ino = cpu_to_le32(ino);
es->s_last_error_block = cpu_to_le64(block);
__save_error_info(sb, function, line);
va_start(args, fmt); va_start(args, fmt);
printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
sb->s_id, function, line, grp);
if (ino)
printk("inode %lu: ", ino);
if (block)
printk("block %llu:", (unsigned long long) block);
vprintk(fmt, args); vprintk(fmt, args);
printk("\n"); printk("\n");
va_end(args); va_end(args);
if (test_opt(sb, ERRORS_CONT)) { if (test_opt(sb, ERRORS_CONT)) {
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_commit_super(sb, 0); ext4_commit_super(sb, 0);
return; return;
} }
ext4_unlock_group(sb, grp); ext4_unlock_group(sb, grp);
ext4_handle_error(sb); ext4_handle_error(sb);
/* /*
@ -660,8 +716,7 @@ static void ext4_put_super(struct super_block *sb)
err = jbd2_journal_destroy(sbi->s_journal); err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL; sbi->s_journal = NULL;
if (err < 0) if (err < 0)
ext4_abort(sb, __func__, ext4_abort(sb, "Couldn't clean up the journal");
"Couldn't clean up the journal");
} }
ext4_release_system_zone(sb); ext4_release_system_zone(sb);
@ -946,14 +1001,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",journal_async_commit"); seq_puts(seq, ",journal_async_commit");
else if (test_opt(sb, JOURNAL_CHECKSUM)) else if (test_opt(sb, JOURNAL_CHECKSUM))
seq_puts(seq, ",journal_checksum"); seq_puts(seq, ",journal_checksum");
if (test_opt(sb, NOBH))
seq_puts(seq, ",nobh");
if (test_opt(sb, I_VERSION)) if (test_opt(sb, I_VERSION))
seq_puts(seq, ",i_version"); seq_puts(seq, ",i_version");
if (!test_opt(sb, DELALLOC)) if (!test_opt(sb, DELALLOC) &&
!(def_mount_opts & EXT4_DEFM_NODELALLOC))
seq_puts(seq, ",nodelalloc"); seq_puts(seq, ",nodelalloc");
if (sbi->s_stripe) if (sbi->s_stripe)
seq_printf(seq, ",stripe=%lu", sbi->s_stripe); seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
/* /*
@ -977,7 +1030,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
if (test_opt(sb, NO_AUTO_DA_ALLOC)) if (test_opt(sb, NO_AUTO_DA_ALLOC))
seq_puts(seq, ",noauto_da_alloc"); seq_puts(seq, ",noauto_da_alloc");
if (test_opt(sb, DISCARD)) if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD))
seq_puts(seq, ",discard"); seq_puts(seq, ",discard");
if (test_opt(sb, NOLOAD)) if (test_opt(sb, NOLOAD))
@ -986,6 +1039,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
if (test_opt(sb, DIOREAD_NOLOCK)) if (test_opt(sb, DIOREAD_NOLOCK))
seq_puts(seq, ",dioread_nolock"); seq_puts(seq, ",dioread_nolock");
if (test_opt(sb, BLOCK_VALIDITY) &&
!(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
seq_puts(seq, ",block_validity");
ext4_show_quota_options(seq, sb); ext4_show_quota_options(seq, sb);
return 0; return 0;
@ -1065,6 +1122,7 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type); static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id, static int ext4_quota_on(struct super_block *sb, int type, int format_id,
char *path); char *path);
static int ext4_quota_off(struct super_block *sb, int type);
static int ext4_quota_on_mount(struct super_block *sb, int type); static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off); size_t len, loff_t off);
@ -1086,7 +1144,7 @@ static const struct dquot_operations ext4_quota_operations = {
static const struct quotactl_ops ext4_qctl_operations = { static const struct quotactl_ops ext4_qctl_operations = {
.quota_on = ext4_quota_on, .quota_on = ext4_quota_on,
.quota_off = dquot_quota_off, .quota_off = ext4_quota_off,
.quota_sync = dquot_quota_sync, .quota_sync = dquot_quota_sync,
.get_info = dquot_get_dqinfo, .get_info = dquot_get_dqinfo,
.set_info = dquot_set_dqinfo, .set_info = dquot_set_dqinfo,
@ -1624,10 +1682,12 @@ set_qf_format:
*n_blocks_count = option; *n_blocks_count = option;
break; break;
case Opt_nobh: case Opt_nobh:
set_opt(sbi->s_mount_opt, NOBH); ext4_msg(sb, KERN_WARNING,
"Ignoring deprecated nobh option");
break; break;
case Opt_bh: case Opt_bh:
clear_opt(sbi->s_mount_opt, NOBH); ext4_msg(sb, KERN_WARNING,
"Ignoring deprecated bh option");
break; break;
case Opt_i_version: case Opt_i_version:
set_opt(sbi->s_mount_opt, I_VERSION); set_opt(sbi->s_mount_opt, I_VERSION);
@ -2249,6 +2309,8 @@ static ssize_t session_write_kbytes_show(struct ext4_attr *a,
{ {
struct super_block *sb = sbi->s_buddy_cache->i_sb; struct super_block *sb = sbi->s_buddy_cache->i_sb;
if (!sb->s_bdev->bd_part)
return snprintf(buf, PAGE_SIZE, "0\n");
return snprintf(buf, PAGE_SIZE, "%lu\n", return snprintf(buf, PAGE_SIZE, "%lu\n",
(part_stat_read(sb->s_bdev->bd_part, sectors[1]) - (part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
sbi->s_sectors_written_start) >> 1); sbi->s_sectors_written_start) >> 1);
@ -2259,6 +2321,8 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
{ {
struct super_block *sb = sbi->s_buddy_cache->i_sb; struct super_block *sb = sbi->s_buddy_cache->i_sb;
if (!sb->s_bdev->bd_part)
return snprintf(buf, PAGE_SIZE, "0\n");
return snprintf(buf, PAGE_SIZE, "%llu\n", return snprintf(buf, PAGE_SIZE, "%llu\n",
(unsigned long long)(sbi->s_kbytes_written + (unsigned long long)(sbi->s_kbytes_written +
((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
@ -2431,6 +2495,53 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
return 1; return 1;
} }
/*
* This function is called once a day if we have errors logged
* on the file system
*/
static void print_daily_error_info(unsigned long arg)
{
struct super_block *sb = (struct super_block *) arg;
struct ext4_sb_info *sbi;
struct ext4_super_block *es;
sbi = EXT4_SB(sb);
es = sbi->s_es;
if (es->s_error_count)
ext4_msg(sb, KERN_NOTICE, "error count: %u",
le32_to_cpu(es->s_error_count));
if (es->s_first_error_time) {
printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
sb->s_id, le32_to_cpu(es->s_first_error_time),
(int) sizeof(es->s_first_error_func),
es->s_first_error_func,
le32_to_cpu(es->s_first_error_line));
if (es->s_first_error_ino)
printk(": inode %u",
le32_to_cpu(es->s_first_error_ino));
if (es->s_first_error_block)
printk(": block %llu", (unsigned long long)
le64_to_cpu(es->s_first_error_block));
printk("\n");
}
if (es->s_last_error_time) {
printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
sb->s_id, le32_to_cpu(es->s_last_error_time),
(int) sizeof(es->s_last_error_func),
es->s_last_error_func,
le32_to_cpu(es->s_last_error_line));
if (es->s_last_error_ino)
printk(": inode %u",
le32_to_cpu(es->s_last_error_ino));
if (es->s_last_error_block)
printk(": block %llu", (unsigned long long)
le64_to_cpu(es->s_last_error_block));
printk("\n");
}
mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
}
static int ext4_fill_super(struct super_block *sb, void *data, int silent) static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__releases(kernel_lock) __releases(kernel_lock)
__acquires(kernel_lock) __acquires(kernel_lock)
@ -2448,7 +2559,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
struct inode *root; struct inode *root;
char *cp; char *cp;
const char *descr; const char *descr;
int ret = -EINVAL; int ret = -ENOMEM;
int blocksize; int blocksize;
unsigned int db_count; unsigned int db_count;
unsigned int i; unsigned int i;
@ -2459,13 +2570,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi) if (!sbi)
return -ENOMEM; goto out_free_orig;
sbi->s_blockgroup_lock = sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
if (!sbi->s_blockgroup_lock) { if (!sbi->s_blockgroup_lock) {
kfree(sbi); kfree(sbi);
return -ENOMEM; goto out_free_orig;
} }
sb->s_fs_info = sbi; sb->s_fs_info = sbi;
sbi->s_mount_opt = 0; sbi->s_mount_opt = 0;
@ -2473,8 +2584,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_resgid = EXT4_DEF_RESGID; sbi->s_resgid = EXT4_DEF_RESGID;
sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
sbi->s_sb_block = sb_block; sbi->s_sb_block = sb_block;
sbi->s_sectors_written_start = part_stat_read(sb->s_bdev->bd_part, if (sb->s_bdev->bd_part)
sectors[1]); sbi->s_sectors_written_start =
part_stat_read(sb->s_bdev->bd_part, sectors[1]);
unlock_kernel(); unlock_kernel();
@ -2482,6 +2594,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
for (cp = sb->s_id; (cp = strchr(cp, '/'));) for (cp = sb->s_id; (cp = strchr(cp, '/'));)
*cp = '!'; *cp = '!';
ret = -EINVAL;
blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
if (!blocksize) { if (!blocksize) {
ext4_msg(sb, KERN_ERR, "unable to set blocksize"); ext4_msg(sb, KERN_ERR, "unable to set blocksize");
@ -2546,6 +2659,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, ERRORS_CONT); set_opt(sbi->s_mount_opt, ERRORS_CONT);
else else
set_opt(sbi->s_mount_opt, ERRORS_RO); set_opt(sbi->s_mount_opt, ERRORS_RO);
if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)
set_opt(sbi->s_mount_opt, BLOCK_VALIDITY);
if (def_mount_opts & EXT4_DEFM_DISCARD)
set_opt(sbi->s_mount_opt, DISCARD);
sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@ -2553,15 +2670,23 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
set_opt(sbi->s_mount_opt, BARRIER); if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
set_opt(sbi->s_mount_opt, BARRIER);
/* /*
* enable delayed allocation by default * enable delayed allocation by default
* Use -o nodelalloc to turn it off * Use -o nodelalloc to turn it off
*/ */
if (!IS_EXT3_SB(sb)) if (!IS_EXT3_SB(sb) &&
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
set_opt(sbi->s_mount_opt, DELALLOC); set_opt(sbi->s_mount_opt, DELALLOC);
if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
&journal_devnum, &journal_ioprio, NULL, 0)) {
ext4_msg(sb, KERN_WARNING,
"failed to parse options in superblock: %s",
sbi->s_es->s_mount_opts);
}
if (!parse_options((char *) data, sb, &journal_devnum, if (!parse_options((char *) data, sb, &journal_devnum,
&journal_ioprio, NULL, 0)) &journal_ioprio, NULL, 0))
goto failed_mount; goto failed_mount;
@ -2912,18 +3037,7 @@ no_journal:
ext4_msg(sb, KERN_ERR, "insufficient memory"); ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount_wq; goto failed_mount_wq;
} }
if (test_opt(sb, NOBH)) {
if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - "
"its supported only with writeback mode");
clear_opt(sbi->s_mount_opt, NOBH);
}
if (test_opt(sb, DIOREAD_NOLOCK)) {
ext4_msg(sb, KERN_WARNING, "dioread_nolock option is "
"not supported with nobh mode");
goto failed_mount_wq;
}
}
EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
if (!EXT4_SB(sb)->dio_unwritten_wq) { if (!EXT4_SB(sb)->dio_unwritten_wq) {
printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@ -3043,7 +3157,14 @@ no_journal:
descr = "out journal"; descr = "out journal";
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
"Opts: %s", descr, orig_data); "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
*sbi->s_es->s_mount_opts ? "; " : "", orig_data);
init_timer(&sbi->s_err_report);
sbi->s_err_report.function = print_daily_error_info;
sbi->s_err_report.data = (unsigned long) sb;
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
lock_kernel(); lock_kernel();
kfree(orig_data); kfree(orig_data);
@ -3093,6 +3214,7 @@ out_fail:
kfree(sbi->s_blockgroup_lock); kfree(sbi->s_blockgroup_lock);
kfree(sbi); kfree(sbi);
lock_kernel(); lock_kernel();
out_free_orig:
kfree(orig_data); kfree(orig_data);
return ret; return ret;
} }
@ -3110,7 +3232,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_min_batch_time = sbi->s_min_batch_time; journal->j_min_batch_time = sbi->s_min_batch_time;
journal->j_max_batch_time = sbi->s_max_batch_time; journal->j_max_batch_time = sbi->s_max_batch_time;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
if (test_opt(sb, BARRIER)) if (test_opt(sb, BARRIER))
journal->j_flags |= JBD2_BARRIER; journal->j_flags |= JBD2_BARRIER;
else else
@ -3119,7 +3241,7 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
else else
journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
static journal_t *ext4_get_journal(struct super_block *sb, static journal_t *ext4_get_journal(struct super_block *sb,
@ -3327,8 +3449,17 @@ static int ext4_load_journal(struct super_block *sb,
if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
err = jbd2_journal_wipe(journal, !really_read_only); err = jbd2_journal_wipe(journal, !really_read_only);
if (!err) if (!err) {
char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
if (save)
memcpy(save, ((char *) es) +
EXT4_S_ERR_START, EXT4_S_ERR_LEN);
err = jbd2_journal_load(journal); err = jbd2_journal_load(journal);
if (save)
memcpy(((char *) es) + EXT4_S_ERR_START,
save, EXT4_S_ERR_LEN);
kfree(save);
}
if (err) { if (err) {
ext4_msg(sb, KERN_ERR, "error loading journal"); ext4_msg(sb, KERN_ERR, "error loading journal");
@ -3384,10 +3515,14 @@ static int ext4_commit_super(struct super_block *sb, int sync)
*/ */
if (!(sb->s_flags & MS_RDONLY)) if (!(sb->s_flags & MS_RDONLY))
es->s_wtime = cpu_to_le32(get_seconds()); es->s_wtime = cpu_to_le32(get_seconds());
es->s_kbytes_written = if (sb->s_bdev->bd_part)
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written +
((part_stat_read(sb->s_bdev->bd_part, sectors[1]) - ((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
EXT4_SB(sb)->s_sectors_written_start) >> 1)); EXT4_SB(sb)->s_sectors_written_start) >> 1));
else
es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
ext4_free_blocks_count_set(es, percpu_counter_sum_positive( ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeblocks_counter)); &EXT4_SB(sb)->s_freeblocks_counter));
es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
@ -3491,7 +3626,7 @@ int ext4_force_commit(struct super_block *sb)
journal = EXT4_SB(sb)->s_journal; journal = EXT4_SB(sb)->s_journal;
if (journal) { if (journal) {
vfs_check_frozen(sb, SB_FREEZE_WRITE); vfs_check_frozen(sb, SB_FREEZE_TRANS);
ret = ext4_journal_force_commit(journal); ret = ext4_journal_force_commit(journal);
} }
@ -3616,7 +3751,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
} }
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ext4_abort(sb, __func__, "Abort forced by user"); ext4_abort(sb, "Abort forced by user");
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
@ -3981,6 +4116,18 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
return err; return err;
} }
static int ext4_quota_off(struct super_block *sb, int type)
{
/* Force all delayed allocation blocks to be allocated */
if (test_opt(sb, DELALLOC)) {
down_read(&sb->s_umount);
sync_filesystem(sb);
up_read(&sb->s_umount);
}
return dquot_quota_off(sb, type);
}
/* Read data from quotafile - avoid pagecache and such because we cannot afford /* Read data from quotafile - avoid pagecache and such because we cannot afford
* acquiring the locks... As quota files are never truncated and quota code * acquiring the locks... As quota files are never truncated and quota code
* itself serializes the operations (and noone else should touch the files) * itself serializes the operations (and noone else should touch the files)
@ -4030,7 +4177,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
int err = 0; int err = 0;
int offset = off & (sb->s_blocksize - 1); int offset = off & (sb->s_blocksize - 1);
int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
struct buffer_head *bh; struct buffer_head *bh;
handle_t *handle = journal_current_handle(); handle_t *handle = journal_current_handle();
@ -4055,24 +4201,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
bh = ext4_bread(handle, inode, blk, 1, &err); bh = ext4_bread(handle, inode, blk, 1, &err);
if (!bh) if (!bh)
goto out; goto out;
if (journal_quota) { err = ext4_journal_get_write_access(handle, bh);
err = ext4_journal_get_write_access(handle, bh); if (err) {
if (err) { brelse(bh);
brelse(bh); goto out;
goto out;
}
} }
lock_buffer(bh); lock_buffer(bh);
memcpy(bh->b_data+offset, data, len); memcpy(bh->b_data+offset, data, len);
flush_dcache_page(bh->b_page); flush_dcache_page(bh->b_page);
unlock_buffer(bh); unlock_buffer(bh);
if (journal_quota) err = ext4_handle_dirty_metadata(handle, NULL, bh);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
else {
/* Always do at least ordered writes for quotas */
err = ext4_jbd2_file_inode(handle, inode);
mark_buffer_dirty(bh);
}
brelse(bh); brelse(bh);
out: out:
if (err) { if (err) {

View File

@ -458,8 +458,7 @@ static void ext4_xattr_update_super_block(handle_t *handle,
if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
sb->s_dirt = 1; ext4_handle_dirty_super(handle, sb);
ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh);
} }
} }

View File

@ -118,13 +118,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
void __jbd2_log_wait_for_space(journal_t *journal) void __jbd2_log_wait_for_space(journal_t *journal)
{ {
int nblocks, space_left; int nblocks, space_left;
assert_spin_locked(&journal->j_state_lock); /* assert_spin_locked(&journal->j_state_lock); */
nblocks = jbd_space_needed(journal); nblocks = jbd_space_needed(journal);
while (__jbd2_log_space_left(journal) < nblocks) { while (__jbd2_log_space_left(journal) < nblocks) {
if (journal->j_flags & JBD2_ABORT) if (journal->j_flags & JBD2_ABORT)
return; return;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
mutex_lock(&journal->j_checkpoint_mutex); mutex_lock(&journal->j_checkpoint_mutex);
/* /*
@ -138,7 +138,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
* filesystem, so abort the journal and leave a stack * filesystem, so abort the journal and leave a stack
* trace for forensic evidence. * trace for forensic evidence.
*/ */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
nblocks = jbd_space_needed(journal); nblocks = jbd_space_needed(journal);
space_left = __jbd2_log_space_left(journal); space_left = __jbd2_log_space_left(journal);
@ -149,7 +149,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
if (journal->j_committing_transaction) if (journal->j_committing_transaction)
tid = journal->j_committing_transaction->t_tid; tid = journal->j_committing_transaction->t_tid;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
if (chkpt) { if (chkpt) {
jbd2_log_do_checkpoint(journal); jbd2_log_do_checkpoint(journal);
} else if (jbd2_cleanup_journal_tail(journal) == 0) { } else if (jbd2_cleanup_journal_tail(journal) == 0) {
@ -167,7 +167,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
WARN_ON(1); WARN_ON(1);
jbd2_journal_abort(journal, 0); jbd2_journal_abort(journal, 0);
} }
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
} else { } else {
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
} }
@ -474,7 +474,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* next transaction ID we will write, and where it will * next transaction ID we will write, and where it will
* start. */ * start. */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
transaction = journal->j_checkpoint_transactions; transaction = journal->j_checkpoint_transactions;
if (transaction) { if (transaction) {
@ -496,7 +496,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/* If the oldest pinned transaction is at the tail of the log /* If the oldest pinned transaction is at the tail of the log
already then there's not much we can do right now. */ already then there's not much we can do right now. */
if (journal->j_tail_sequence == first_tid) { if (journal->j_tail_sequence == first_tid) {
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return 1; return 1;
} }
@ -516,7 +516,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
journal->j_free += freed; journal->j_free += freed;
journal->j_tail_sequence = first_tid; journal->j_tail_sequence = first_tid;
journal->j_tail = blocknr; journal->j_tail = blocknr;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
/* /*
* If there is an external journal, we need to make sure that * If there is an external journal, we need to make sure that
@ -775,7 +775,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_log_list == NULL);
J_ASSERT(transaction->t_checkpoint_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL);
J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(transaction->t_checkpoint_io_list == NULL);
J_ASSERT(transaction->t_updates == 0); J_ASSERT(atomic_read(&transaction->t_updates) == 0);
J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction);

View File

@ -150,11 +150,11 @@ static int journal_submit_commit_record(journal_t *journal,
*/ */
if (ret == -EOPNOTSUPP && barrier_done) { if (ret == -EOPNOTSUPP && barrier_done) {
printk(KERN_WARNING printk(KERN_WARNING
"JBD: barrier-based sync failed on %s - " "JBD2: Disabling barriers on %s, "
"disabling barriers\n", journal->j_devname); "not supported by device\n", journal->j_devname);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_BARRIER; journal->j_flags &= ~JBD2_BARRIER;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
/* And try again, without the barrier */ /* And try again, without the barrier */
lock_buffer(bh); lock_buffer(bh);
@ -180,11 +180,11 @@ retry:
wait_on_buffer(bh); wait_on_buffer(bh);
if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) {
printk(KERN_WARNING printk(KERN_WARNING
"JBD2: wait_on_commit_record: sync failed on %s - " "JBD2: %s: disabling barries on %s - not supported "
"disabling barriers\n", journal->j_devname); "by device\n", __func__, journal->j_devname);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_BARRIER; journal->j_flags &= ~JBD2_BARRIER;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
lock_buffer(bh); lock_buffer(bh);
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
@ -400,7 +400,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(1, "JBD: starting commit of transaction %d\n", jbd_debug(1, "JBD: starting commit of transaction %d\n",
commit_transaction->t_tid); commit_transaction->t_tid);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED; commit_transaction->t_state = T_LOCKED;
/* /*
@ -417,23 +417,23 @@ void jbd2_journal_commit_transaction(journal_t *journal)
stats.run.rs_locked); stats.run.rs_locked);
spin_lock(&commit_transaction->t_handle_lock); spin_lock(&commit_transaction->t_handle_lock);
while (commit_transaction->t_updates) { while (atomic_read(&commit_transaction->t_updates)) {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
prepare_to_wait(&journal->j_wait_updates, &wait, prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
if (commit_transaction->t_updates) { if (atomic_read(&commit_transaction->t_updates)) {
spin_unlock(&commit_transaction->t_handle_lock); spin_unlock(&commit_transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
schedule(); schedule();
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
spin_lock(&commit_transaction->t_handle_lock); spin_lock(&commit_transaction->t_handle_lock);
} }
finish_wait(&journal->j_wait_updates, &wait); finish_wait(&journal->j_wait_updates, &wait);
} }
spin_unlock(&commit_transaction->t_handle_lock); spin_unlock(&commit_transaction->t_handle_lock);
J_ASSERT (commit_transaction->t_outstanding_credits <= J_ASSERT (atomic_read(&commit_transaction->t_outstanding_credits) <=
journal->j_max_transaction_buffers); journal->j_max_transaction_buffers);
/* /*
@ -497,7 +497,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
start_time = ktime_get(); start_time = ktime_get();
commit_transaction->t_log_start = journal->j_head; commit_transaction->t_log_start = journal->j_head;
wake_up(&journal->j_wait_transaction_locked); wake_up(&journal->j_wait_transaction_locked);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
jbd_debug (3, "JBD: commit phase 2\n"); jbd_debug (3, "JBD: commit phase 2\n");
@ -519,19 +519,20 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* transaction! Now comes the tricky part: we need to write out * transaction! Now comes the tricky part: we need to write out
* metadata. Loop over the transaction's entire buffer list: * metadata. Loop over the transaction's entire buffer list:
*/ */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
commit_transaction->t_state = T_COMMIT; commit_transaction->t_state = T_COMMIT;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
trace_jbd2_commit_logging(journal, commit_transaction); trace_jbd2_commit_logging(journal, commit_transaction);
stats.run.rs_logging = jiffies; stats.run.rs_logging = jiffies;
stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing, stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
stats.run.rs_logging); stats.run.rs_logging);
stats.run.rs_blocks = commit_transaction->t_outstanding_credits; stats.run.rs_blocks =
atomic_read(&commit_transaction->t_outstanding_credits);
stats.run.rs_blocks_logged = 0; stats.run.rs_blocks_logged = 0;
J_ASSERT(commit_transaction->t_nr_buffers <= J_ASSERT(commit_transaction->t_nr_buffers <=
commit_transaction->t_outstanding_credits); atomic_read(&commit_transaction->t_outstanding_credits));
err = 0; err = 0;
descriptor = NULL; descriptor = NULL;
@ -616,7 +617,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* the free space in the log, but this counter is changed * the free space in the log, but this counter is changed
* by jbd2_journal_next_log_block() also. * by jbd2_journal_next_log_block() also.
*/ */
commit_transaction->t_outstanding_credits--; atomic_dec(&commit_transaction->t_outstanding_credits);
/* Bump b_count to prevent truncate from stumbling over /* Bump b_count to prevent truncate from stumbling over
the shadowed buffer! @@@ This can go if we ever get the shadowed buffer! @@@ This can go if we ever get
@ -977,7 +978,7 @@ restart_loop:
* __jbd2_journal_drop_transaction(). Otherwise we could race with * __jbd2_journal_drop_transaction(). Otherwise we could race with
* other checkpointing code processing the transaction... * other checkpointing code processing the transaction...
*/ */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
/* /*
* Now recheck if some buffers did not get attached to the transaction * Now recheck if some buffers did not get attached to the transaction
@ -985,7 +986,7 @@ restart_loop:
*/ */
if (commit_transaction->t_forget) { if (commit_transaction->t_forget) {
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
goto restart_loop; goto restart_loop;
} }
@ -1003,7 +1004,8 @@ restart_loop:
* File the transaction statistics * File the transaction statistics
*/ */
stats.ts_tid = commit_transaction->t_tid; stats.ts_tid = commit_transaction->t_tid;
stats.run.rs_handle_count = commit_transaction->t_handle_count; stats.run.rs_handle_count =
atomic_read(&commit_transaction->t_handle_count);
trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
commit_transaction->t_tid, &stats.run); commit_transaction->t_tid, &stats.run);
@ -1037,7 +1039,7 @@ restart_loop:
journal->j_average_commit_time*3) / 4; journal->j_average_commit_time*3) / 4;
else else
journal->j_average_commit_time = commit_time; journal->j_average_commit_time = commit_time;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
if (commit_transaction->t_checkpoint_list == NULL && if (commit_transaction->t_checkpoint_list == NULL &&
commit_transaction->t_checkpoint_io_list == NULL) { commit_transaction->t_checkpoint_io_list == NULL) {

View File

@ -41,6 +41,7 @@
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/log2.h> #include <linux/log2.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/backing-dev.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h> #include <trace/events/jbd2.h>
@ -48,8 +49,6 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/page.h> #include <asm/page.h>
EXPORT_SYMBOL(jbd2_journal_start);
EXPORT_SYMBOL(jbd2_journal_restart);
EXPORT_SYMBOL(jbd2_journal_extend); EXPORT_SYMBOL(jbd2_journal_extend);
EXPORT_SYMBOL(jbd2_journal_stop); EXPORT_SYMBOL(jbd2_journal_stop);
EXPORT_SYMBOL(jbd2_journal_lock_updates); EXPORT_SYMBOL(jbd2_journal_lock_updates);
@ -143,7 +142,7 @@ static int kjournald2(void *arg)
/* /*
* And now, wait forever for commit wakeup events. * And now, wait forever for commit wakeup events.
*/ */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
loop: loop:
if (journal->j_flags & JBD2_UNMOUNT) if (journal->j_flags & JBD2_UNMOUNT)
@ -154,10 +153,10 @@ loop:
if (journal->j_commit_sequence != journal->j_commit_request) { if (journal->j_commit_sequence != journal->j_commit_request) {
jbd_debug(1, "OK, requests differ\n"); jbd_debug(1, "OK, requests differ\n");
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
del_timer_sync(&journal->j_commit_timer); del_timer_sync(&journal->j_commit_timer);
jbd2_journal_commit_transaction(journal); jbd2_journal_commit_transaction(journal);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
goto loop; goto loop;
} }
@ -169,9 +168,9 @@ loop:
* be already stopped. * be already stopped.
*/ */
jbd_debug(1, "Now suspending kjournald2\n"); jbd_debug(1, "Now suspending kjournald2\n");
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
refrigerator(); refrigerator();
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
} else { } else {
/* /*
* We assume on resume that commits are already there, * We assume on resume that commits are already there,
@ -191,9 +190,9 @@ loop:
if (journal->j_flags & JBD2_UNMOUNT) if (journal->j_flags & JBD2_UNMOUNT)
should_sleep = 0; should_sleep = 0;
if (should_sleep) { if (should_sleep) {
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
schedule(); schedule();
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
} }
finish_wait(&journal->j_wait_commit, &wait); finish_wait(&journal->j_wait_commit, &wait);
} }
@ -211,7 +210,7 @@ loop:
goto loop; goto loop;
end_loop: end_loop:
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
del_timer_sync(&journal->j_commit_timer); del_timer_sync(&journal->j_commit_timer);
journal->j_task = NULL; journal->j_task = NULL;
wake_up(&journal->j_wait_done_commit); wake_up(&journal->j_wait_done_commit);
@ -234,16 +233,16 @@ static int jbd2_journal_start_thread(journal_t *journal)
static void journal_kill_thread(journal_t *journal) static void journal_kill_thread(journal_t *journal)
{ {
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_UNMOUNT; journal->j_flags |= JBD2_UNMOUNT;
while (journal->j_task) { while (journal->j_task) {
wake_up(&journal->j_wait_commit); wake_up(&journal->j_wait_commit);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
wait_event(journal->j_wait_done_commit, journal->j_task == NULL); wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
} }
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
/* /*
@ -310,7 +309,17 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
*/ */
J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); retry_alloc:
new_bh = alloc_buffer_head(GFP_NOFS);
if (!new_bh) {
/*
* Failure is not an option, but __GFP_NOFAIL is going
* away; so we retry ourselves here.
*/
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry_alloc;
}
/* keep subsequent assertions sane */ /* keep subsequent assertions sane */
new_bh->b_state = 0; new_bh->b_state = 0;
init_buffer(new_bh, NULL, NULL); init_buffer(new_bh, NULL, NULL);
@ -442,7 +451,7 @@ int __jbd2_log_space_left(journal_t *journal)
{ {
int left = journal->j_free; int left = journal->j_free;
assert_spin_locked(&journal->j_state_lock); /* assert_spin_locked(&journal->j_state_lock); */
/* /*
* Be pessimistic here about the number of those free blocks which * Be pessimistic here about the number of those free blocks which
@ -487,9 +496,9 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid)
{ {
int ret; int ret;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
ret = __jbd2_log_start_commit(journal, tid); ret = __jbd2_log_start_commit(journal, tid);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return ret; return ret;
} }
@ -508,7 +517,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
transaction_t *transaction = NULL; transaction_t *transaction = NULL;
tid_t tid; tid_t tid;
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
if (journal->j_running_transaction && !current->journal_info) { if (journal->j_running_transaction && !current->journal_info) {
transaction = journal->j_running_transaction; transaction = journal->j_running_transaction;
__jbd2_log_start_commit(journal, transaction->t_tid); __jbd2_log_start_commit(journal, transaction->t_tid);
@ -516,12 +525,12 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
transaction = journal->j_committing_transaction; transaction = journal->j_committing_transaction;
if (!transaction) { if (!transaction) {
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
return 0; /* Nothing to retry */ return 0; /* Nothing to retry */
} }
tid = transaction->t_tid; tid = transaction->t_tid;
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
jbd2_log_wait_commit(journal, tid); jbd2_log_wait_commit(journal, tid);
return 1; return 1;
} }
@ -535,7 +544,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
{ {
int ret = 0; int ret = 0;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
if (journal->j_running_transaction) { if (journal->j_running_transaction) {
tid_t tid = journal->j_running_transaction->t_tid; tid_t tid = journal->j_running_transaction->t_tid;
@ -554,7 +563,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
*ptid = journal->j_committing_transaction->t_tid; *ptid = journal->j_committing_transaction->t_tid;
ret = 1; ret = 1;
} }
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return ret; return ret;
} }
@ -566,26 +575,24 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
{ {
int err = 0; int err = 0;
read_lock(&journal->j_state_lock);
#ifdef CONFIG_JBD2_DEBUG #ifdef CONFIG_JBD2_DEBUG
spin_lock(&journal->j_state_lock);
if (!tid_geq(journal->j_commit_request, tid)) { if (!tid_geq(journal->j_commit_request, tid)) {
printk(KERN_EMERG printk(KERN_EMERG
"%s: error: j_commit_request=%d, tid=%d\n", "%s: error: j_commit_request=%d, tid=%d\n",
__func__, journal->j_commit_request, tid); __func__, journal->j_commit_request, tid);
} }
spin_unlock(&journal->j_state_lock);
#endif #endif
spin_lock(&journal->j_state_lock);
while (tid_gt(tid, journal->j_commit_sequence)) { while (tid_gt(tid, journal->j_commit_sequence)) {
jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
tid, journal->j_commit_sequence); tid, journal->j_commit_sequence);
wake_up(&journal->j_wait_commit); wake_up(&journal->j_wait_commit);
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
wait_event(journal->j_wait_done_commit, wait_event(journal->j_wait_done_commit,
!tid_gt(tid, journal->j_commit_sequence)); !tid_gt(tid, journal->j_commit_sequence));
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
} }
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
if (unlikely(is_journal_aborted(journal))) { if (unlikely(is_journal_aborted(journal))) {
printk(KERN_EMERG "journal commit I/O error\n"); printk(KERN_EMERG "journal commit I/O error\n");
@ -602,7 +609,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
{ {
unsigned long blocknr; unsigned long blocknr;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
J_ASSERT(journal->j_free > 1); J_ASSERT(journal->j_free > 1);
blocknr = journal->j_head; blocknr = journal->j_head;
@ -610,7 +617,7 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
journal->j_free--; journal->j_free--;
if (journal->j_head == journal->j_last) if (journal->j_head == journal->j_last)
journal->j_head = journal->j_first; journal->j_head = journal->j_first;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return jbd2_journal_bmap(journal, blocknr, retp); return jbd2_journal_bmap(journal, blocknr, retp);
} }
@ -830,7 +837,7 @@ static journal_t * journal_init_common (void)
mutex_init(&journal->j_checkpoint_mutex); mutex_init(&journal->j_checkpoint_mutex);
spin_lock_init(&journal->j_revoke_lock); spin_lock_init(&journal->j_revoke_lock);
spin_lock_init(&journal->j_list_lock); spin_lock_init(&journal->j_list_lock);
spin_lock_init(&journal->j_state_lock); rwlock_init(&journal->j_state_lock);
journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
journal->j_min_batch_time = 0; journal->j_min_batch_time = 0;
@ -1096,14 +1103,14 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
} }
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
journal->j_tail, journal->j_tail_sequence, journal->j_errno); journal->j_tail, journal->j_tail_sequence, journal->j_errno);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
sb->s_start = cpu_to_be32(journal->j_tail); sb->s_start = cpu_to_be32(journal->j_tail);
sb->s_errno = cpu_to_be32(journal->j_errno); sb->s_errno = cpu_to_be32(journal->j_errno);
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
BUFFER_TRACE(bh, "marking dirty"); BUFFER_TRACE(bh, "marking dirty");
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
@ -1124,12 +1131,12 @@ out:
* any future commit will have to be careful to update the * any future commit will have to be careful to update the
* superblock again to re-record the true start of the log. */ * superblock again to re-record the true start of the log. */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
if (sb->s_start) if (sb->s_start)
journal->j_flags &= ~JBD2_FLUSHED; journal->j_flags &= ~JBD2_FLUSHED;
else else
journal->j_flags |= JBD2_FLUSHED; journal->j_flags |= JBD2_FLUSHED;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
/* /*
@ -1391,13 +1398,9 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat,
unsigned long ro, unsigned long incompat) unsigned long ro, unsigned long incompat)
{ {
journal_superblock_t *sb;
if (!compat && !ro && !incompat) if (!compat && !ro && !incompat)
return 1; return 1;
sb = journal->j_superblock;
/* We can support any known requested features iff the /* We can support any known requested features iff the
* superblock is in version 2. Otherwise we fail to support any * superblock is in version 2. Otherwise we fail to support any
* extended sb features. */ * extended sb features. */
@ -1545,7 +1548,7 @@ int jbd2_journal_flush(journal_t *journal)
transaction_t *transaction = NULL; transaction_t *transaction = NULL;
unsigned long old_tail; unsigned long old_tail;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
/* Force everything buffered to the log... */ /* Force everything buffered to the log... */
if (journal->j_running_transaction) { if (journal->j_running_transaction) {
@ -1558,10 +1561,10 @@ int jbd2_journal_flush(journal_t *journal)
if (transaction) { if (transaction) {
tid_t tid = transaction->t_tid; tid_t tid = transaction->t_tid;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
jbd2_log_wait_commit(journal, tid); jbd2_log_wait_commit(journal, tid);
} else { } else {
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
/* ...and flush everything in the log out to disk. */ /* ...and flush everything in the log out to disk. */
@ -1585,12 +1588,12 @@ int jbd2_journal_flush(journal_t *journal)
* the magic code for a fully-recovered superblock. Any future * the magic code for a fully-recovered superblock. Any future
* commits of data to the journal will restore the current * commits of data to the journal will restore the current
* s_start value. */ * s_start value. */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
old_tail = journal->j_tail; old_tail = journal->j_tail;
journal->j_tail = 0; journal->j_tail = 0;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
jbd2_journal_update_superblock(journal, 1); jbd2_journal_update_superblock(journal, 1);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_tail = old_tail; journal->j_tail = old_tail;
J_ASSERT(!journal->j_running_transaction); J_ASSERT(!journal->j_running_transaction);
@ -1598,7 +1601,7 @@ int jbd2_journal_flush(journal_t *journal)
J_ASSERT(!journal->j_checkpoint_transactions); J_ASSERT(!journal->j_checkpoint_transactions);
J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_head == journal->j_tail);
J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return 0; return 0;
} }
@ -1617,7 +1620,6 @@ int jbd2_journal_flush(journal_t *journal)
int jbd2_journal_wipe(journal_t *journal, int write) int jbd2_journal_wipe(journal_t *journal, int write)
{ {
journal_superblock_t *sb;
int err = 0; int err = 0;
J_ASSERT (!(journal->j_flags & JBD2_LOADED)); J_ASSERT (!(journal->j_flags & JBD2_LOADED));
@ -1626,8 +1628,6 @@ int jbd2_journal_wipe(journal_t *journal, int write)
if (err) if (err)
return err; return err;
sb = journal->j_superblock;
if (!journal->j_tail) if (!journal->j_tail)
goto no_recovery; goto no_recovery;
@ -1665,12 +1665,12 @@ void __jbd2_journal_abort_hard(journal_t *journal)
printk(KERN_ERR "Aborting journal on device %s.\n", printk(KERN_ERR "Aborting journal on device %s.\n",
journal->j_devname); journal->j_devname);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_flags |= JBD2_ABORT; journal->j_flags |= JBD2_ABORT;
transaction = journal->j_running_transaction; transaction = journal->j_running_transaction;
if (transaction) if (transaction)
__jbd2_log_start_commit(journal, transaction->t_tid); __jbd2_log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
/* Soft abort: record the abort error status in the journal superblock, /* Soft abort: record the abort error status in the journal superblock,
@ -1755,12 +1755,12 @@ int jbd2_journal_errno(journal_t *journal)
{ {
int err; int err;
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
if (journal->j_flags & JBD2_ABORT) if (journal->j_flags & JBD2_ABORT)
err = -EROFS; err = -EROFS;
else else
err = journal->j_errno; err = journal->j_errno;
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
return err; return err;
} }
@ -1775,12 +1775,12 @@ int jbd2_journal_clear_err(journal_t *journal)
{ {
int err = 0; int err = 0;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
if (journal->j_flags & JBD2_ABORT) if (journal->j_flags & JBD2_ABORT)
err = -EROFS; err = -EROFS;
else else
journal->j_errno = 0; journal->j_errno = 0;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return err; return err;
} }
@ -1793,10 +1793,10 @@ int jbd2_journal_clear_err(journal_t *journal)
*/ */
void jbd2_journal_ack_err(journal_t *journal) void jbd2_journal_ack_err(journal_t *journal)
{ {
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
if (journal->j_errno) if (journal->j_errno)
journal->j_flags |= JBD2_ACK_ERR; journal->j_flags |= JBD2_ACK_ERR;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
int jbd2_journal_blocks_per_page(struct inode *inode) int jbd2_journal_blocks_per_page(struct inode *inode)
@ -2201,8 +2201,6 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
void jbd2_journal_release_jbd_inode(journal_t *journal, void jbd2_journal_release_jbd_inode(journal_t *journal,
struct jbd2_inode *jinode) struct jbd2_inode *jinode)
{ {
int writeout = 0;
if (!journal) if (!journal)
return; return;
restart: restart:
@ -2219,9 +2217,6 @@ restart:
goto restart; goto restart;
} }
/* Do we need to wait for data writeback? */
if (journal->j_committing_transaction == jinode->i_transaction)
writeout = 1;
if (jinode->i_transaction) { if (jinode->i_transaction) {
list_del(&jinode->i_list); list_del(&jinode->i_list);
jinode->i_transaction = NULL; jinode->i_transaction = NULL;

View File

@ -285,12 +285,10 @@ int jbd2_journal_recover(journal_t *journal)
int jbd2_journal_skip_recovery(journal_t *journal) int jbd2_journal_skip_recovery(journal_t *journal)
{ {
int err; int err;
journal_superblock_t * sb;
struct recovery_info info; struct recovery_info info;
memset (&info, 0, sizeof(info)); memset (&info, 0, sizeof(info));
sb = journal->j_superblock;
err = do_one_pass(journal, &info, PASS_SCAN); err = do_one_pass(journal, &info, PASS_SCAN);
@ -299,7 +297,8 @@ int jbd2_journal_skip_recovery(journal_t *journal)
++journal->j_transaction_sequence; ++journal->j_transaction_sequence;
} else { } else {
#ifdef CONFIG_JBD2_DEBUG #ifdef CONFIG_JBD2_DEBUG
int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); int dropped = info.end_transaction -
be32_to_cpu(journal->j_superblock->s_sequence);
#endif #endif
jbd_debug(1, jbd_debug(1,
"JBD: ignoring %d transaction%s from the journal.\n", "JBD: ignoring %d transaction%s from the journal.\n",
@ -365,11 +364,6 @@ static int do_one_pass(journal_t *journal,
int tag_bytes = journal_tag_bytes(journal); int tag_bytes = journal_tag_bytes(journal);
__u32 crc32_sum = ~0; /* Transactional Checksums */ __u32 crc32_sum = ~0; /* Transactional Checksums */
/* Precompute the maximum metadata descriptors in a descriptor block */
int MAX_BLOCKS_PER_DESC;
MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
/ tag_bytes);
/* /*
* First thing is to establish what we expect to find in the log * First thing is to establish what we expect to find in the log
* (in terms of transaction IDs), and where (in terms of log * (in terms of transaction IDs), and where (in terms of log

View File

@ -26,6 +26,8 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/hrtimer.h> #include <linux/hrtimer.h>
#include <linux/backing-dev.h>
#include <linux/module.h>
static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
@ -53,6 +55,9 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
transaction->t_tid = journal->j_transaction_sequence++; transaction->t_tid = journal->j_transaction_sequence++;
transaction->t_expires = jiffies + journal->j_commit_interval; transaction->t_expires = jiffies + journal->j_commit_interval;
spin_lock_init(&transaction->t_handle_lock); spin_lock_init(&transaction->t_handle_lock);
atomic_set(&transaction->t_updates, 0);
atomic_set(&transaction->t_outstanding_credits, 0);
atomic_set(&transaction->t_handle_count, 0);
INIT_LIST_HEAD(&transaction->t_inode_list); INIT_LIST_HEAD(&transaction->t_inode_list);
INIT_LIST_HEAD(&transaction->t_private_list); INIT_LIST_HEAD(&transaction->t_private_list);
@ -83,65 +88,75 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
* transaction's buffer credits. * transaction's buffer credits.
*/ */
static int start_this_handle(journal_t *journal, handle_t *handle) static int start_this_handle(journal_t *journal, handle_t *handle,
int gfp_mask)
{ {
transaction_t *transaction; transaction_t *transaction;
int needed; int needed;
int nblocks = handle->h_buffer_credits; int nblocks = handle->h_buffer_credits;
transaction_t *new_transaction = NULL; transaction_t *new_transaction = NULL;
int ret = 0;
unsigned long ts = jiffies; unsigned long ts = jiffies;
if (nblocks > journal->j_max_transaction_buffers) { if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
current->comm, nblocks, current->comm, nblocks,
journal->j_max_transaction_buffers); journal->j_max_transaction_buffers);
ret = -ENOSPC; return -ENOSPC;
goto out;
} }
alloc_transaction: alloc_transaction:
if (!journal->j_running_transaction) { if (!journal->j_running_transaction) {
new_transaction = kzalloc(sizeof(*new_transaction), new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask);
GFP_NOFS|__GFP_NOFAIL);
if (!new_transaction) { if (!new_transaction) {
ret = -ENOMEM; /*
goto out; * If __GFP_FS is not present, then we may be
* being called from inside the fs writeback
* layer, so we MUST NOT fail. Since
* __GFP_NOFAIL is going away, we will arrange
* to retry the allocation ourselves.
*/
if ((gfp_mask & __GFP_FS) == 0) {
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto alloc_transaction;
}
return -ENOMEM;
} }
} }
jbd_debug(3, "New handle %p going live.\n", handle); jbd_debug(3, "New handle %p going live.\n", handle);
repeat:
/* /*
* We need to hold j_state_lock until t_updates has been incremented, * We need to hold j_state_lock until t_updates has been incremented,
* for proper journal barrier handling * for proper journal barrier handling
*/ */
spin_lock(&journal->j_state_lock); repeat:
repeat_locked: read_lock(&journal->j_state_lock);
if (is_journal_aborted(journal) || if (is_journal_aborted(journal) ||
(journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
ret = -EROFS; kfree(new_transaction);
goto out; return -EROFS;
} }
/* Wait on the journal's transaction barrier if necessary */ /* Wait on the journal's transaction barrier if necessary */
if (journal->j_barrier_count) { if (journal->j_barrier_count) {
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
wait_event(journal->j_wait_transaction_locked, wait_event(journal->j_wait_transaction_locked,
journal->j_barrier_count == 0); journal->j_barrier_count == 0);
goto repeat; goto repeat;
} }
if (!journal->j_running_transaction) { if (!journal->j_running_transaction) {
if (!new_transaction) { read_unlock(&journal->j_state_lock);
spin_unlock(&journal->j_state_lock); if (!new_transaction)
goto alloc_transaction; goto alloc_transaction;
write_lock(&journal->j_state_lock);
if (!journal->j_running_transaction) {
jbd2_get_transaction(journal, new_transaction);
new_transaction = NULL;
} }
jbd2_get_transaction(journal, new_transaction); write_unlock(&journal->j_state_lock);
new_transaction = NULL; goto repeat;
} }
transaction = journal->j_running_transaction; transaction = journal->j_running_transaction;
@ -155,7 +170,7 @@ repeat_locked:
prepare_to_wait(&journal->j_wait_transaction_locked, prepare_to_wait(&journal->j_wait_transaction_locked,
&wait, TASK_UNINTERRUPTIBLE); &wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
schedule(); schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait); finish_wait(&journal->j_wait_transaction_locked, &wait);
goto repeat; goto repeat;
@ -166,8 +181,8 @@ repeat_locked:
* buffers requested by this operation, we need to stall pending a log * buffers requested by this operation, we need to stall pending a log
* checkpoint to free some more log space. * checkpoint to free some more log space.
*/ */
spin_lock(&transaction->t_handle_lock); needed = atomic_add_return(nblocks,
needed = transaction->t_outstanding_credits + nblocks; &transaction->t_outstanding_credits);
if (needed > journal->j_max_transaction_buffers) { if (needed > journal->j_max_transaction_buffers) {
/* /*
@ -178,11 +193,11 @@ repeat_locked:
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
jbd_debug(2, "Handle %p starting new commit...\n", handle); jbd_debug(2, "Handle %p starting new commit...\n", handle);
spin_unlock(&transaction->t_handle_lock); atomic_sub(nblocks, &transaction->t_outstanding_credits);
prepare_to_wait(&journal->j_wait_transaction_locked, &wait, prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
__jbd2_log_start_commit(journal, transaction->t_tid); __jbd2_log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
schedule(); schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait); finish_wait(&journal->j_wait_transaction_locked, &wait);
goto repeat; goto repeat;
@ -215,35 +230,48 @@ repeat_locked:
*/ */
if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
spin_unlock(&transaction->t_handle_lock); atomic_sub(nblocks, &transaction->t_outstanding_credits);
__jbd2_log_wait_for_space(journal); read_unlock(&journal->j_state_lock);
goto repeat_locked; write_lock(&journal->j_state_lock);
if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
__jbd2_log_wait_for_space(journal);
write_unlock(&journal->j_state_lock);
goto repeat;
} }
/* OK, account for the buffers that this operation expects to /* OK, account for the buffers that this operation expects to
* use and add the handle to the running transaction. */ * use and add the handle to the running transaction.
*
if (time_after(transaction->t_start, ts)) { * In order for t_max_wait to be reliable, it must be
* protected by a lock. But doing so will mean that
* start_this_handle() can not be run in parallel on SMP
* systems, which limits our scalability. So we only enable
* it when debugging is enabled. We may want to use a
* separate flag, eventually, so we can enable this
* independently of debugging.
*/
#ifdef CONFIG_JBD2_DEBUG
if (jbd2_journal_enable_debug &&
time_after(transaction->t_start, ts)) {
ts = jbd2_time_diff(ts, transaction->t_start); ts = jbd2_time_diff(ts, transaction->t_start);
spin_lock(&transaction->t_handle_lock);
if (ts > transaction->t_max_wait) if (ts > transaction->t_max_wait)
transaction->t_max_wait = ts; transaction->t_max_wait = ts;
spin_unlock(&transaction->t_handle_lock);
} }
#endif
handle->h_transaction = transaction; handle->h_transaction = transaction;
transaction->t_outstanding_credits += nblocks; atomic_inc(&transaction->t_updates);
transaction->t_updates++; atomic_inc(&transaction->t_handle_count);
transaction->t_handle_count++;
jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
handle, nblocks, transaction->t_outstanding_credits, handle, nblocks,
atomic_read(&transaction->t_outstanding_credits),
__jbd2_log_space_left(journal)); __jbd2_log_space_left(journal));
spin_unlock(&transaction->t_handle_lock); read_unlock(&journal->j_state_lock);
spin_unlock(&journal->j_state_lock);
lock_map_acquire(&handle->h_lockdep_map); lock_map_acquire(&handle->h_lockdep_map);
out: kfree(new_transaction);
if (unlikely(new_transaction)) /* It's usually NULL */ return 0;
kfree(new_transaction);
return ret;
} }
static struct lock_class_key jbd2_handle_key; static struct lock_class_key jbd2_handle_key;
@ -278,7 +306,7 @@ static handle_t *new_handle(int nblocks)
* *
* Return a pointer to a newly allocated handle, or NULL on failure * Return a pointer to a newly allocated handle, or NULL on failure
*/ */
handle_t *jbd2_journal_start(journal_t *journal, int nblocks) handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
{ {
handle_t *handle = journal_current_handle(); handle_t *handle = journal_current_handle();
int err; int err;
@ -298,7 +326,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
current->journal_info = handle; current->journal_info = handle;
err = start_this_handle(journal, handle); err = start_this_handle(journal, handle, gfp_mask);
if (err < 0) { if (err < 0) {
jbd2_free_handle(handle); jbd2_free_handle(handle);
current->journal_info = NULL; current->journal_info = NULL;
@ -308,6 +336,15 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
out: out:
return handle; return handle;
} }
EXPORT_SYMBOL(jbd2__journal_start);
handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
{
return jbd2__journal_start(journal, nblocks, GFP_NOFS);
}
EXPORT_SYMBOL(jbd2_journal_start);
/** /**
* int jbd2_journal_extend() - extend buffer credits. * int jbd2_journal_extend() - extend buffer credits.
@ -342,7 +379,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
result = 1; result = 1;
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
/* Don't extend a locked-down transaction! */ /* Don't extend a locked-down transaction! */
if (handle->h_transaction->t_state != T_RUNNING) { if (handle->h_transaction->t_state != T_RUNNING) {
@ -352,7 +389,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
} }
spin_lock(&transaction->t_handle_lock); spin_lock(&transaction->t_handle_lock);
wanted = transaction->t_outstanding_credits + nblocks; wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks;
if (wanted > journal->j_max_transaction_buffers) { if (wanted > journal->j_max_transaction_buffers) {
jbd_debug(3, "denied handle %p %d blocks: " jbd_debug(3, "denied handle %p %d blocks: "
@ -367,14 +404,14 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
} }
handle->h_buffer_credits += nblocks; handle->h_buffer_credits += nblocks;
transaction->t_outstanding_credits += nblocks; atomic_add(nblocks, &transaction->t_outstanding_credits);
result = 0; result = 0;
jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
unlock: unlock:
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
error_out: error_out:
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
out: out:
return result; return result;
} }
@ -394,8 +431,7 @@ out:
* transaction capabable of guaranteeing the requested number of * transaction capabable of guaranteeing the requested number of
* credits. * credits.
*/ */
int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
int jbd2_journal_restart(handle_t *handle, int nblocks)
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal; journal_t *journal = transaction->t_journal;
@ -410,29 +446,35 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
* First unlink the handle from its current transaction, and start the * First unlink the handle from its current transaction, and start the
* commit on that. * commit on that.
*/ */
J_ASSERT(transaction->t_updates > 0); J_ASSERT(atomic_read(&transaction->t_updates) > 0);
J_ASSERT(journal_current_handle() == handle); J_ASSERT(journal_current_handle() == handle);
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
spin_lock(&transaction->t_handle_lock); spin_lock(&transaction->t_handle_lock);
transaction->t_outstanding_credits -= handle->h_buffer_credits; atomic_sub(handle->h_buffer_credits,
transaction->t_updates--; &transaction->t_outstanding_credits);
if (atomic_dec_and_test(&transaction->t_updates))
if (!transaction->t_updates)
wake_up(&journal->j_wait_updates); wake_up(&journal->j_wait_updates);
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
jbd_debug(2, "restarting handle %p\n", handle); jbd_debug(2, "restarting handle %p\n", handle);
__jbd2_log_start_commit(journal, transaction->t_tid); __jbd2_log_start_commit(journal, transaction->t_tid);
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
lock_map_release(&handle->h_lockdep_map); lock_map_release(&handle->h_lockdep_map);
handle->h_buffer_credits = nblocks; handle->h_buffer_credits = nblocks;
ret = start_this_handle(journal, handle); ret = start_this_handle(journal, handle, gfp_mask);
return ret; return ret;
} }
EXPORT_SYMBOL(jbd2__journal_restart);
int jbd2_journal_restart(handle_t *handle, int nblocks)
{
return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
}
EXPORT_SYMBOL(jbd2_journal_restart);
/** /**
* void jbd2_journal_lock_updates () - establish a transaction barrier. * void jbd2_journal_lock_updates () - establish a transaction barrier.
* @journal: Journal to establish a barrier on. * @journal: Journal to establish a barrier on.
@ -447,7 +489,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
{ {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
++journal->j_barrier_count; ++journal->j_barrier_count;
/* Wait until there are no running updates */ /* Wait until there are no running updates */
@ -458,19 +500,19 @@ void jbd2_journal_lock_updates(journal_t *journal)
break; break;
spin_lock(&transaction->t_handle_lock); spin_lock(&transaction->t_handle_lock);
if (!transaction->t_updates) { if (!atomic_read(&transaction->t_updates)) {
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
break; break;
} }
prepare_to_wait(&journal->j_wait_updates, &wait, prepare_to_wait(&journal->j_wait_updates, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
spin_unlock(&transaction->t_handle_lock); spin_unlock(&transaction->t_handle_lock);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
schedule(); schedule();
finish_wait(&journal->j_wait_updates, &wait); finish_wait(&journal->j_wait_updates, &wait);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
} }
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
/* /*
* We have now established a barrier against other normal updates, but * We have now established a barrier against other normal updates, but
@ -494,9 +536,9 @@ void jbd2_journal_unlock_updates (journal_t *journal)
J_ASSERT(journal->j_barrier_count != 0); J_ASSERT(journal->j_barrier_count != 0);
mutex_unlock(&journal->j_barrier); mutex_unlock(&journal->j_barrier);
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
--journal->j_barrier_count; --journal->j_barrier_count;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
wake_up(&journal->j_wait_transaction_locked); wake_up(&journal->j_wait_transaction_locked);
} }
@ -1238,7 +1280,8 @@ int jbd2_journal_stop(handle_t *handle)
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal; journal_t *journal = transaction->t_journal;
int err; int err, wait_for_commit = 0;
tid_t tid;
pid_t pid; pid_t pid;
J_ASSERT(journal_current_handle() == handle); J_ASSERT(journal_current_handle() == handle);
@ -1246,7 +1289,7 @@ int jbd2_journal_stop(handle_t *handle)
if (is_handle_aborted(handle)) if (is_handle_aborted(handle))
err = -EIO; err = -EIO;
else { else {
J_ASSERT(transaction->t_updates > 0); J_ASSERT(atomic_read(&transaction->t_updates) > 0);
err = 0; err = 0;
} }
@ -1291,9 +1334,9 @@ int jbd2_journal_stop(handle_t *handle)
journal->j_last_sync_writer = pid; journal->j_last_sync_writer = pid;
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
commit_time = journal->j_average_commit_time; commit_time = journal->j_average_commit_time;
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
trans_time = ktime_to_ns(ktime_sub(ktime_get(), trans_time = ktime_to_ns(ktime_sub(ktime_get(),
transaction->t_start_time)); transaction->t_start_time));
@ -1314,14 +1357,8 @@ int jbd2_journal_stop(handle_t *handle)
if (handle->h_sync) if (handle->h_sync)
transaction->t_synchronous_commit = 1; transaction->t_synchronous_commit = 1;
current->journal_info = NULL; current->journal_info = NULL;
spin_lock(&transaction->t_handle_lock); atomic_sub(handle->h_buffer_credits,
transaction->t_outstanding_credits -= handle->h_buffer_credits; &transaction->t_outstanding_credits);
transaction->t_updates--;
if (!transaction->t_updates) {
wake_up(&journal->j_wait_updates);
if (journal->j_barrier_count)
wake_up(&journal->j_wait_transaction_locked);
}
/* /*
* If the handle is marked SYNC, we need to set another commit * If the handle is marked SYNC, we need to set another commit
@ -1330,15 +1367,13 @@ int jbd2_journal_stop(handle_t *handle)
* transaction is too old now. * transaction is too old now.
*/ */
if (handle->h_sync || if (handle->h_sync ||
transaction->t_outstanding_credits > (atomic_read(&transaction->t_outstanding_credits) >
journal->j_max_transaction_buffers || journal->j_max_transaction_buffers) ||
time_after_eq(jiffies, transaction->t_expires)) { time_after_eq(jiffies, transaction->t_expires)) {
/* Do this even for aborted journals: an abort still /* Do this even for aborted journals: an abort still
* completes the commit thread, it just doesn't write * completes the commit thread, it just doesn't write
* anything to disk. */ * anything to disk. */
tid_t tid = transaction->t_tid;
spin_unlock(&transaction->t_handle_lock);
jbd_debug(2, "transaction too old, requesting commit for " jbd_debug(2, "transaction too old, requesting commit for "
"handle %p\n", handle); "handle %p\n", handle);
/* This is non-blocking */ /* This is non-blocking */
@ -1349,11 +1384,25 @@ int jbd2_journal_stop(handle_t *handle)
* to wait for the commit to complete. * to wait for the commit to complete.
*/ */
if (handle->h_sync && !(current->flags & PF_MEMALLOC)) if (handle->h_sync && !(current->flags & PF_MEMALLOC))
err = jbd2_log_wait_commit(journal, tid); wait_for_commit = 1;
} else {
spin_unlock(&transaction->t_handle_lock);
} }
/*
* Once we drop t_updates, if it goes to zero the transaction
* could start commiting on us and eventually disappear. So
* once we do this, we must not dereference transaction
* pointer again.
*/
tid = transaction->t_tid;
if (atomic_dec_and_test(&transaction->t_updates)) {
wake_up(&journal->j_wait_updates);
if (journal->j_barrier_count)
wake_up(&journal->j_wait_transaction_locked);
}
if (wait_for_commit)
err = jbd2_log_wait_commit(journal, tid);
lock_map_release(&handle->h_lockdep_map); lock_map_release(&handle->h_lockdep_map);
jbd2_free_handle(handle); jbd2_free_handle(handle);
@ -1719,7 +1768,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
goto zap_buffer_unlocked; goto zap_buffer_unlocked;
/* OK, we have data buffer in journaled mode */ /* OK, we have data buffer in journaled mode */
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
jbd_lock_bh_state(bh); jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
@ -1772,7 +1821,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return ret; return ret;
} else { } else {
/* There is no currently-running transaction. So the /* There is no currently-running transaction. So the
@ -1786,7 +1835,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return ret; return ret;
} else { } else {
/* The orphan record's transaction has /* The orphan record's transaction has
@ -1810,7 +1859,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_put_journal_head(jh); jbd2_journal_put_journal_head(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
return 0; return 0;
} else { } else {
/* Good, the buffer belongs to the running transaction. /* Good, the buffer belongs to the running transaction.
@ -1829,7 +1878,7 @@ zap_buffer:
zap_buffer_no_jh: zap_buffer_no_jh:
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
zap_buffer_unlocked: zap_buffer_unlocked:
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
J_ASSERT_BH(bh, !buffer_jbddirty(bh)); J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@ -2136,9 +2185,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
/* Locks are here just to force reading of recent values, it is /* Locks are here just to force reading of recent values, it is
* enough that the transaction was not committing before we started * enough that the transaction was not committing before we started
* a transaction adding the inode to orphan list */ * a transaction adding the inode to orphan list */
spin_lock(&journal->j_state_lock); read_lock(&journal->j_state_lock);
commit_trans = journal->j_committing_transaction; commit_trans = journal->j_committing_transaction;
spin_unlock(&journal->j_state_lock); read_unlock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
inode_trans = jinode->i_transaction; inode_trans = jinode->i_transaction;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);

View File

@ -760,13 +760,13 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
if (osb->osb_commit_interval) if (osb->osb_commit_interval)
commit_interval = osb->osb_commit_interval; commit_interval = osb->osb_commit_interval;
spin_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_commit_interval = commit_interval; journal->j_commit_interval = commit_interval;
if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
journal->j_flags |= JBD2_BARRIER; journal->j_flags |= JBD2_BARRIER;
else else
journal->j_flags &= ~JBD2_BARRIER; journal->j_flags &= ~JBD2_BARRIER;
spin_unlock(&journal->j_state_lock); write_unlock(&journal->j_state_lock);
} }
int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)

View File

@ -601,13 +601,13 @@ struct transaction_s
* Number of outstanding updates running on this transaction * Number of outstanding updates running on this transaction
* [t_handle_lock] * [t_handle_lock]
*/ */
int t_updates; atomic_t t_updates;
/* /*
* Number of buffers reserved for use by all handles in this transaction * Number of buffers reserved for use by all handles in this transaction
* handle but not yet modified. [t_handle_lock] * handle but not yet modified. [t_handle_lock]
*/ */
int t_outstanding_credits; atomic_t t_outstanding_credits;
/* /*
* Forward and backward links for the circular list of all transactions * Forward and backward links for the circular list of all transactions
@ -629,7 +629,7 @@ struct transaction_s
/* /*
* How many handles used this transaction? [t_handle_lock] * How many handles used this transaction? [t_handle_lock]
*/ */
int t_handle_count; atomic_t t_handle_count;
/* /*
* This transaction is being forced and some process is * This transaction is being forced and some process is
@ -764,7 +764,7 @@ struct journal_s
/* /*
* Protect the various scalars in the journal * Protect the various scalars in the journal
*/ */
spinlock_t j_state_lock; rwlock_t j_state_lock;
/* /*
* Number of processes waiting to create a barrier lock [j_state_lock] * Number of processes waiting to create a barrier lock [j_state_lock]
@ -1082,7 +1082,9 @@ static inline handle_t *journal_current_handle(void)
*/ */
extern handle_t *jbd2_journal_start(journal_t *, int nblocks); extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
extern int jbd2_journal_restart (handle_t *, int nblocks); extern handle_t *jbd2__journal_start(journal_t *, int nblocks, int gfp_mask);
extern int jbd2_journal_restart(handle_t *, int nblocks);
extern int jbd2__journal_restart(handle_t *, int nblocks, int gfp_mask);
extern int jbd2_journal_extend (handle_t *, int nblocks); extern int jbd2_journal_extend (handle_t *, int nblocks);
extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
@ -1257,8 +1259,8 @@ static inline int jbd_space_needed(journal_t *journal)
{ {
int nblocks = journal->j_max_transaction_buffers; int nblocks = journal->j_max_transaction_buffers;
if (journal->j_committing_transaction) if (journal->j_committing_transaction)
nblocks += journal->j_committing_transaction-> nblocks += atomic_read(&journal->j_committing_transaction->
t_outstanding_credits; t_outstanding_credits);
return nblocks; return nblocks;
} }

View File

@ -395,11 +395,12 @@ DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_group_pa,
); );
TRACE_EVENT(ext4_mb_release_inode_pa, TRACE_EVENT(ext4_mb_release_inode_pa,
TP_PROTO(struct ext4_allocation_context *ac, TP_PROTO(struct super_block *sb,
struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa, struct ext4_prealloc_space *pa,
unsigned long long block, unsigned int count), unsigned long long block, unsigned int count),
TP_ARGS(ac, pa, block, count), TP_ARGS(sb, ac, pa, block, count),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( dev_t, dev ) __field( dev_t, dev )
@ -410,8 +411,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = ac->ac_sb->s_dev; __entry->dev = sb->s_dev;
__entry->ino = ac->ac_inode->i_ino; __entry->ino = (ac && ac->ac_inode) ?
ac->ac_inode->i_ino : 0;
__entry->block = block; __entry->block = block;
__entry->count = count; __entry->count = count;
), ),
@ -422,10 +424,11 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
); );
TRACE_EVENT(ext4_mb_release_group_pa, TRACE_EVENT(ext4_mb_release_group_pa,
TP_PROTO(struct ext4_allocation_context *ac, TP_PROTO(struct super_block *sb,
struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa), struct ext4_prealloc_space *pa),
TP_ARGS(ac, pa), TP_ARGS(sb, ac, pa),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( dev_t, dev ) __field( dev_t, dev )
@ -436,8 +439,9 @@ TRACE_EVENT(ext4_mb_release_group_pa,
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = ac->ac_sb->s_dev; __entry->dev = sb->s_dev;
__entry->ino = ac->ac_inode->i_ino; __entry->ino = (ac && ac->ac_inode) ?
ac->ac_inode->i_ino : 0;
__entry->pa_pstart = pa->pa_pstart; __entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len; __entry->pa_len = pa->pa_len;
), ),