linux/fs/fat/inode.c

1979 lines
51 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/fat/inode.c
*
* Written 1992,1993 by Werner Almesberger
* VFAT extensions by Gordon Chaffee, merged with msdos fs by Henrik Storner
* Rewritten for the constant inumbers support by Al Viro
*
* Fixes:
*
* Max Cohan: Fixed invalid FSINFO offset when info_sector is 0
*/
#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/vfs.h>
#include <linux/seq_file.h>
#include <linux/parser.h>
#include <linux/uio.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <asm/unaligned.h>
#include <linux/random.h>
#include <linux/iversion.h>
#include "fat.h"
#ifndef CONFIG_FAT_DEFAULT_IOCHARSET
/* if user don't select VFAT, this is undefined. */
#define CONFIG_FAT_DEFAULT_IOCHARSET ""
#endif
#define KB_IN_SECTORS 2
/* DOS dates from 1980/1/1 through 2107/12/31 */
#define FAT_DATE_MIN (0<<9 | 1<<5 | 1)
#define FAT_DATE_MAX (127<<9 | 12<<5 | 31)
#define FAT_TIME_MAX (23<<11 | 59<<5 | 29)
/*
* A deserialized copy of the on-disk structure laid out in struct
* fat_boot_sector.
*/
struct fat_bios_param_block {
u16 fat_sector_size;
u8 fat_sec_per_clus;
u16 fat_reserved;
u8 fat_fats;
u16 fat_dir_entries;
u16 fat_sectors;
u16 fat_fat_length;
u32 fat_total_sect;
u8 fat16_state;
u32 fat16_vol_id;
u32 fat32_length;
u32 fat32_root_cluster;
u16 fat32_info_sector;
u8 fat32_state;
u32 fat32_vol_id;
};
static int fat_default_codepage = CONFIG_FAT_DEFAULT_CODEPAGE;
static char fat_default_iocharset[] = CONFIG_FAT_DEFAULT_IOCHARSET;
static struct fat_floppy_defaults {
unsigned nr_sectors;
unsigned sec_per_clus;
unsigned dir_entries;
unsigned media;
unsigned fat_length;
} floppy_defaults[] = {
{
.nr_sectors = 160 * KB_IN_SECTORS,
.sec_per_clus = 1,
.dir_entries = 64,
.media = 0xFE,
.fat_length = 1,
},
{
.nr_sectors = 180 * KB_IN_SECTORS,
.sec_per_clus = 1,
.dir_entries = 64,
.media = 0xFC,
.fat_length = 2,
},
{
.nr_sectors = 320 * KB_IN_SECTORS,
.sec_per_clus = 2,
.dir_entries = 112,
.media = 0xFF,
.fat_length = 1,
},
{
.nr_sectors = 360 * KB_IN_SECTORS,
.sec_per_clus = 2,
.dir_entries = 112,
.media = 0xFD,
.fat_length = 2,
},
};
fat: add fat_fallocate operation Implement preallocation via the fallocate syscall on VFAT partitions. This patch is based on an earlier patch of the same name which had some issues detailed below and did not get accepted. Refer https://lkml.org/lkml/2007/12/22/130. a) The preallocated space was not persistent when the FALLOC_FL_KEEP_SIZE flag was set. It will deallocate cluster at evict time. b) There was no need to zero out the clusters when the flag was set Instead of doing an expanding truncate, just allocate clusters and add them to the fat chain. This reduces preallocation time. Compatibility with windows: There are no issues when FALLOC_FL_KEEP_SIZE is not set because it just does an expanding truncate. Thus reading from the preallocated area on windows returns null until data is written to it. When a file with preallocated area using the FALLOC_FL_KEEP_SIZE was written to on windows, the windows driver freed-up the preallocated clusters and allocated new clusters for the new data. The freed up clusters gets reflected in the free space available for the partition which can be seen from the Volume properties. The windows chkdsk tool also does not report any errors on a disk containing files with preallocated space. And there is also no issue using linux fat fsck. because discard preallocated clusters at repair time. Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com> Signed-off-by: Amit Sahrawat <a.sahrawat@samsung.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-20 14:59:41 -08:00
int fat_add_cluster(struct inode *inode)
{
int err, cluster;
err = fat_alloc_clusters(inode, &cluster, 1);
if (err)
return err;
/* FIXME: this cluster should be added after data of this
* cluster is writed */
err = fat_chain_add(inode, cluster, 1);
if (err)
fat_free_clusters(inode, cluster);
return err;
}
static inline int __fat_get_block(struct inode *inode, sector_t iblock,
unsigned long *max_blocks,
struct buffer_head *bh_result, int create)
{
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
unsigned long mapped_blocks;
sector_t phys, last_block;
int err, offset;
err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create, false);
if (err)
return err;
if (phys) {
map_bh(bh_result, sb, phys);
*max_blocks = min(mapped_blocks, *max_blocks);
return 0;
}
if (!create)
return 0;
if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) {
fat_fs_error(sb, "corrupted file size (i_pos %lld, %lld)",
MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
return -EIO;
}
last_block = inode->i_blocks >> (sb->s_blocksize_bits - 9);
offset = (unsigned long)iblock & (sbi->sec_per_clus - 1);
/*
* allocate a cluster according to the following.
* 1) no more available blocks
* 2) not part of fallocate region
*/
if (!offset && !(iblock < last_block)) {
/* TODO: multiple cluster allocation would be desirable. */
err = fat_add_cluster(inode);
if (err)
return err;
}
/* available blocks on this cluster */
mapped_blocks = sbi->sec_per_clus - offset;
*max_blocks = min(mapped_blocks, *max_blocks);
MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create, false);
if (err)
return err;
if (!phys) {
fat_fs_error(sb,
"invalid FAT chain (i_pos %lld, last_block %llu)",
MSDOS_I(inode)->i_pos,
(unsigned long long)last_block);
return -EIO;
}
BUG_ON(*max_blocks != mapped_blocks);
set_buffer_new(bh_result);
map_bh(bh_result, sb, phys);
return 0;
}
static int fat_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
struct super_block *sb = inode->i_sb;
unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
int err;
err = __fat_get_block(inode, iblock, &max_blocks, bh_result, create);
if (err)
return err;
bh_result->b_size = max_blocks << sb->s_blocksize_bits;
return 0;
}
static int fat_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
return mpage_writepages(mapping, wbc, fat_get_block);
}
static int fat_read_folio(struct file *file, struct folio *folio)
{
return mpage_read_folio(folio, fat_get_block);
}
static void fat_readahead(struct readahead_control *rac)
{
mpage_readahead(rac, fat_get_block);
}
static void fat_write_failed(struct address_space *mapping, loff_t to)
{
struct inode *inode = mapping->host;
if (to > inode->i_size) {
truncate_pagecache(inode, inode->i_size);
fat_truncate_blocks(inode, inode->i_size);
}
}
static int fat_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
struct page **pagep, void **fsdata)
{
int err;
*pagep = NULL;
err = cont_write_begin(file, mapping, pos, len,
pagep, fsdata, fat_get_block,
&MSDOS_I(mapping->host)->mmu_private);
if (err < 0)
fat_write_failed(mapping, pos + len);
return err;
}
static int fat_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *pagep, void *fsdata)
{
struct inode *inode = mapping->host;
int err;
err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
if (err < len)
fat_write_failed(mapping, pos + len);
if (!(err < 0) && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
fat_truncate_time(inode, NULL, S_CTIME|S_MTIME);
MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
mark_inode_dirty(inode);
}
return err;
}
static ssize_t fat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
size_t count = iov_iter_count(iter);
loff_t offset = iocb->ki_pos;
ssize_t ret;
if (iov_iter_rw(iter) == WRITE) {
/*
* FIXME: blockdev_direct_IO() doesn't use ->write_begin(),
* so we need to update the ->mmu_private to block boundary.
*
* But we must fill the remaining area or hole by nul for
* updating ->mmu_private.
*
* Return 0, and fallback to normal buffered write.
*/
loff_t size = offset + count;
if (MSDOS_I(inode)->mmu_private < size)
return 0;
}
/*
* FAT need to use the DIO_LOCKING for avoiding the race
* condition of fat_get_block() and ->truncate().
*/
ret = blockdev_direct_IO(iocb, inode, iter, fat_get_block);
if (ret < 0 && iov_iter_rw(iter) == WRITE)
fat_write_failed(mapping, offset + count);
return ret;
}
static int fat_get_block_bmap(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
struct super_block *sb = inode->i_sb;
unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
int err;
sector_t bmap;
unsigned long mapped_blocks;
BUG_ON(create != 0);
err = fat_bmap(inode, iblock, &bmap, &mapped_blocks, create, true);
if (err)
return err;
if (bmap) {
map_bh(bh_result, sb, bmap);
max_blocks = min(mapped_blocks, max_blocks);
}
bh_result->b_size = max_blocks << sb->s_blocksize_bits;
return 0;
}
static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
{
sector_t blocknr;
/* fat_get_cluster() assumes the requested blocknr isn't truncated. */
down_read(&MSDOS_I(mapping->host)->truncate_lock);
blocknr = generic_block_bmap(mapping, block, fat_get_block_bmap);
up_read(&MSDOS_I(mapping->host)->truncate_lock);
return blocknr;
}
/*
* fat_block_truncate_page() zeroes out a mapping from file offset `from'
* up to the end of the block which corresponds to `from'.
* This is required during truncate to physically zeroout the tail end
* of that block so it doesn't yield old data if the file is later grown.
* Also, avoid causing failure from fsx for cases of "data past EOF"
*/
int fat_block_truncate_page(struct inode *inode, loff_t from)
{
return block_truncate_page(inode->i_mapping, from, fat_get_block);
}
static const struct address_space_operations fat_aops = {
.dirty_folio = block_dirty_folio,
.invalidate_folio = block_invalidate_folio,
.read_folio = fat_read_folio,
.readahead = fat_readahead,
.writepages = fat_writepages,
.write_begin = fat_write_begin,
.write_end = fat_write_end,
.direct_IO = fat_direct_IO,
.bmap = _fat_bmap,
.migrate_folio = buffer_migrate_folio,
};
/*
* New FAT inode stuff. We do the following:
* a) i_ino is constant and has nothing with on-disk location.
* b) FAT manages its own cache of directory entries.
* c) *This* cache is indexed by on-disk location.
* d) inode has an associated directory entry, all right, but
* it may be unhashed.
* e) currently entries are stored within struct inode. That should
* change.
* f) we deal with races in the following way:
* 1. readdir() and lookup() do FAT-dir-cache lookup.
* 2. rename() unhashes the F-d-c entry and rehashes it in
* a new place.
* 3. unlink() and rmdir() unhash F-d-c entry.
* 4. fat_write_inode() checks whether the thing is unhashed.
* If it is we silently return. If it isn't we do bread(),
* check if the location is still valid and retry if it
* isn't. Otherwise we do changes.
* 5. Spinlock is used to protect hash/unhash/location check/lookup
* 6. fat_evict_inode() unhashes the F-d-c entry.
* 7. lookup() and readdir() do igrab() if they find a F-d-c entry
* and consider negative result as cache miss.
*/
static void fat_hash_init(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int i;
spin_lock_init(&sbi->inode_hash_lock);
for (i = 0; i < FAT_HASH_SIZE; i++)
INIT_HLIST_HEAD(&sbi->inode_hashtable[i]);
}
static inline unsigned long fat_hash(loff_t i_pos)
{
return hash_32(i_pos, FAT_HASH_BITS);
}
static void dir_hash_init(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int i;
spin_lock_init(&sbi->dir_hash_lock);
for (i = 0; i < FAT_HASH_SIZE; i++)
INIT_HLIST_HEAD(&sbi->dir_hashtable[i]);
}
void fat_attach(struct inode *inode, loff_t i_pos)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
if (inode->i_ino != MSDOS_ROOT_INO) {
struct hlist_head *head = sbi->inode_hashtable
+ fat_hash(i_pos);
spin_lock(&sbi->inode_hash_lock);
MSDOS_I(inode)->i_pos = i_pos;
hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head);
spin_unlock(&sbi->inode_hash_lock);
}
/* If NFS support is enabled, cache the mapping of start cluster
* to directory inode. This is used during reconnection of
* dentries to the filesystem root.
*/
if (S_ISDIR(inode->i_mode) && sbi->options.nfs) {
struct hlist_head *d_head = sbi->dir_hashtable;
d_head += fat_dir_hash(MSDOS_I(inode)->i_logstart);
spin_lock(&sbi->dir_hash_lock);
hlist_add_head(&MSDOS_I(inode)->i_dir_hash, d_head);
spin_unlock(&sbi->dir_hash_lock);
}
}
EXPORT_SYMBOL_GPL(fat_attach);
void fat_detach(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
spin_lock(&sbi->inode_hash_lock);
MSDOS_I(inode)->i_pos = 0;
hlist_del_init(&MSDOS_I(inode)->i_fat_hash);
spin_unlock(&sbi->inode_hash_lock);
if (S_ISDIR(inode->i_mode) && sbi->options.nfs) {
spin_lock(&sbi->dir_hash_lock);
hlist_del_init(&MSDOS_I(inode)->i_dir_hash);
spin_unlock(&sbi->dir_hash_lock);
}
}
EXPORT_SYMBOL_GPL(fat_detach);
struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos);
struct msdos_inode_info *i;
struct inode *inode = NULL;
spin_lock(&sbi->inode_hash_lock);
hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin <peter.senna@gmail.com> Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Gleb Natapov <gleb@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-27 17:06:00 -08:00
hlist_for_each_entry(i, head, i_fat_hash) {
BUG_ON(i->vfs_inode.i_sb != sb);
if (i->i_pos != i_pos)
continue;
inode = igrab(&i->vfs_inode);
if (inode)
break;
}
spin_unlock(&sbi->inode_hash_lock);
return inode;
}
static int is_exec(unsigned char *extension)
{
unsigned char exe_extensions[] = "EXECOMBAT", *walk;
for (walk = exe_extensions; *walk; walk += 3)
if (!strncmp(extension, walk, 3))
return 1;
return 0;
}
static int fat_calc_dir_size(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
int ret, fclus, dclus;
inode->i_size = 0;
if (MSDOS_I(inode)->i_start == 0)
return 0;
ret = fat_get_cluster(inode, FAT_ENT_EOF, &fclus, &dclus);
if (ret < 0)
return ret;
inode->i_size = (fclus + 1) << sbi->cluster_bits;
return 0;
}
static int fat_validate_dir(struct inode *dir)
{
struct super_block *sb = dir->i_sb;
if (dir->i_nlink < 2) {
/* Directory should have "."/".." entries at least. */
fat_fs_error(sb, "corrupted directory (invalid entries)");
return -EIO;
}
if (MSDOS_I(dir)->i_start == 0 ||
MSDOS_I(dir)->i_start == MSDOS_SB(sb)->root_cluster) {
/* Directory should point valid cluster. */
fat_fs_error(sb, "corrupted directory (invalid i_start)");
return -EIO;
}
return 0;
}
/* doesn't deal with root inode */
int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
struct timespec64 mtime;
int error;
MSDOS_I(inode)->i_pos = 0;
inode->i_uid = sbi->options.fs_uid;
inode->i_gid = sbi->options.fs_gid;
inode_inc_iversion(inode);
inode->i_generation = get_random_u32();
if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) {
inode->i_generation &= ~1;
inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO);
inode->i_op = sbi->dir_ops;
inode->i_fop = &fat_dir_operations;
MSDOS_I(inode)->i_start = fat_get_start(sbi, de);
MSDOS_I(inode)->i_logstart = MSDOS_I(inode)->i_start;
error = fat_calc_dir_size(inode);
if (error < 0)
return error;
MSDOS_I(inode)->mmu_private = inode->i_size;
set_nlink(inode, fat_subdirs(inode));
error = fat_validate_dir(inode);
if (error < 0)
return error;
} else { /* not a directory */
inode->i_generation |= 1;
inode->i_mode = fat_make_mode(sbi, de->attr,
((sbi->options.showexec && !is_exec(de->name + 8))
? S_IRUGO|S_IWUGO : S_IRWXUGO));
MSDOS_I(inode)->i_start = fat_get_start(sbi, de);
MSDOS_I(inode)->i_logstart = MSDOS_I(inode)->i_start;
inode->i_size = le32_to_cpu(de->size);
inode->i_op = &fat_file_inode_operations;
inode->i_fop = &fat_file_operations;
inode->i_mapping->a_ops = &fat_aops;
MSDOS_I(inode)->mmu_private = inode->i_size;
}
if (de->attr & ATTR_SYS) {
if (sbi->options.sys_immutable)
inode->i_flags |= S_IMMUTABLE;
}
fat_save_attrs(inode, de->attr);
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
fat_time_fat2unix(sbi, &mtime, de->time, de->date, 0);
inode_set_mtime_to_ts(inode, mtime);
inode_set_ctime_to_ts(inode, mtime);
if (sbi->options.isvfat) {
struct timespec64 atime;
fat_time_fat2unix(sbi, &atime, 0, de->adate, 0);
inode_set_atime_to_ts(inode, atime);
fat_time_fat2unix(sbi, &MSDOS_I(inode)->i_crtime, de->ctime,
de->cdate, de->ctime_cs);
} else
inode_set_atime_to_ts(inode, fat_truncate_atime(sbi, &mtime));
return 0;
}
static inline void fat_lock_build_inode(struct msdos_sb_info *sbi)
{
if (sbi->options.nfs == FAT_NFS_NOSTALE_RO)
mutex_lock(&sbi->nfs_build_inode_lock);
}
static inline void fat_unlock_build_inode(struct msdos_sb_info *sbi)
{
if (sbi->options.nfs == FAT_NFS_NOSTALE_RO)
mutex_unlock(&sbi->nfs_build_inode_lock);
}
struct inode *fat_build_inode(struct super_block *sb,
struct msdos_dir_entry *de, loff_t i_pos)
{
struct inode *inode;
int err;
fat_lock_build_inode(MSDOS_SB(sb));
inode = fat_iget(sb, i_pos);
if (inode)
goto out;
inode = new_inode(sb);
if (!inode) {
inode = ERR_PTR(-ENOMEM);
goto out;
}
inode->i_ino = iunique(sb, MSDOS_ROOT_INO);
inode_set_iversion(inode, 1);
err = fat_fill_inode(inode, de);
if (err) {
iput(inode);
inode = ERR_PTR(err);
goto out;
}
fat_attach(inode, i_pos);
insert_inode_hash(inode);
out:
fat_unlock_build_inode(MSDOS_SB(sb));
return inode;
}
EXPORT_SYMBOL_GPL(fat_build_inode);
fat: add fat_fallocate operation Implement preallocation via the fallocate syscall on VFAT partitions. This patch is based on an earlier patch of the same name which had some issues detailed below and did not get accepted. Refer https://lkml.org/lkml/2007/12/22/130. a) The preallocated space was not persistent when the FALLOC_FL_KEEP_SIZE flag was set. It will deallocate cluster at evict time. b) There was no need to zero out the clusters when the flag was set Instead of doing an expanding truncate, just allocate clusters and add them to the fat chain. This reduces preallocation time. Compatibility with windows: There are no issues when FALLOC_FL_KEEP_SIZE is not set because it just does an expanding truncate. Thus reading from the preallocated area on windows returns null until data is written to it. When a file with preallocated area using the FALLOC_FL_KEEP_SIZE was written to on windows, the windows driver freed-up the preallocated clusters and allocated new clusters for the new data. The freed up clusters gets reflected in the free space available for the partition which can be seen from the Volume properties. The windows chkdsk tool also does not report any errors on a disk containing files with preallocated space. And there is also no issue using linux fat fsck. because discard preallocated clusters at repair time. Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com> Signed-off-by: Amit Sahrawat <a.sahrawat@samsung.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-20 14:59:41 -08:00
static int __fat_write_inode(struct inode *inode, int wait);
static void fat_free_eofblocks(struct inode *inode)
{
/* Release unwritten fallocated blocks on inode eviction. */
if ((inode->i_blocks << 9) >
round_up(MSDOS_I(inode)->mmu_private,
MSDOS_SB(inode->i_sb)->cluster_size)) {
int err;
fat_truncate_blocks(inode, MSDOS_I(inode)->mmu_private);
/* Fallocate results in updating the i_start/iogstart
* for the zero byte file. So, make it return to
* original state during evict and commit it to avoid
* any corruption on the next access to the cluster
* chain for the file.
*/
err = __fat_write_inode(inode, inode_needs_sync(inode));
if (err) {
fat_msg(inode->i_sb, KERN_WARNING, "Failed to "
"update on disk inode for unused "
"fallocated blocks, inode could be "
"corrupted. Please run fsck");
}
}
}
static void fat_evict_inode(struct inode *inode)
{
mm + fs: store shadow entries in page cache Reclaim will be leaving shadow entries in the page cache radix tree upon evicting the real page. As those pages are found from the LRU, an iput() can lead to the inode being freed concurrently. At this point, reclaim must no longer install shadow pages because the inode freeing code needs to ensure the page tree is really empty. Add an address_space flag, AS_EXITING, that the inode freeing code sets under the tree lock before doing the final truncate. Reclaim will check for this flag before installing shadow pages. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Rik van Riel <riel@redhat.com> Reviewed-by: Minchan Kim <minchan@kernel.org> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Bob Liu <bob.liu@oracle.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Dave Chinner <david@fromorbit.com> Cc: Greg Thelen <gthelen@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Luigi Semenzato <semenzato@google.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Metin Doslu <metin@citusdata.com> Cc: Michel Lespinasse <walken@google.com> Cc: Ozgun Erdogan <ozgun@citusdata.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Roman Gushchin <klamm@yandex-team.ru> Cc: Ryan Mallon <rmallon@gmail.com> Cc: Tejun Heo <tj@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-04-03 14:47:49 -07:00
truncate_inode_pages_final(&inode->i_data);
if (!inode->i_nlink) {
inode->i_size = 0;
fat_truncate_blocks(inode, 0);
fat: add fat_fallocate operation Implement preallocation via the fallocate syscall on VFAT partitions. This patch is based on an earlier patch of the same name which had some issues detailed below and did not get accepted. Refer https://lkml.org/lkml/2007/12/22/130. a) The preallocated space was not persistent when the FALLOC_FL_KEEP_SIZE flag was set. It will deallocate cluster at evict time. b) There was no need to zero out the clusters when the flag was set Instead of doing an expanding truncate, just allocate clusters and add them to the fat chain. This reduces preallocation time. Compatibility with windows: There are no issues when FALLOC_FL_KEEP_SIZE is not set because it just does an expanding truncate. Thus reading from the preallocated area on windows returns null until data is written to it. When a file with preallocated area using the FALLOC_FL_KEEP_SIZE was written to on windows, the windows driver freed-up the preallocated clusters and allocated new clusters for the new data. The freed up clusters gets reflected in the free space available for the partition which can be seen from the Volume properties. The windows chkdsk tool also does not report any errors on a disk containing files with preallocated space. And there is also no issue using linux fat fsck. because discard preallocated clusters at repair time. Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com> Signed-off-by: Amit Sahrawat <a.sahrawat@samsung.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-01-20 14:59:41 -08:00
} else
fat_free_eofblocks(inode);
invalidate_inode_buffers(inode);
clear_inode(inode);
fat_cache_inval_inode(inode);
fat_detach(inode);
}
static void fat_set_state(struct super_block *sb,
unsigned int set, unsigned int force)
{
struct buffer_head *bh;
struct fat_boot_sector *b;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
/* do not change any thing if mounted read only */
if (sb_rdonly(sb) && !force)
return;
/* do not change state if fs was dirty */
if (sbi->dirty) {
/* warn only on set (mount). */
if (set)
fat_msg(sb, KERN_WARNING, "Volume was not properly "
"unmounted. Some data may be corrupt. "
"Please run fsck.");
return;
}
bh = sb_bread(sb, 0);
if (bh == NULL) {
fat_msg(sb, KERN_ERR, "unable to read boot sector "
"to mark fs as dirty");
return;
}
b = (struct fat_boot_sector *) bh->b_data;
if (is_fat32(sbi)) {
if (set)
b->fat32.state |= FAT_STATE_DIRTY;
else
b->fat32.state &= ~FAT_STATE_DIRTY;
} else /* fat 16 and 12 */ {
if (set)
b->fat16.state |= FAT_STATE_DIRTY;
else
b->fat16.state &= ~FAT_STATE_DIRTY;
}
mark_buffer_dirty(bh);
sync_dirty_buffer(bh);
brelse(bh);
}
static void fat_reset_iocharset(struct fat_mount_options *opts)
{
if (opts->iocharset != fat_default_iocharset) {
/* Note: opts->iocharset can be NULL here */
kfree(opts->iocharset);
opts->iocharset = fat_default_iocharset;
}
}
static void delayed_free(struct rcu_head *p)
{
struct msdos_sb_info *sbi = container_of(p, struct msdos_sb_info, rcu);
unload_nls(sbi->nls_disk);
unload_nls(sbi->nls_io);
fat_reset_iocharset(&sbi->options);
kfree(sbi);
}
static void fat_put_super(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
fat_set_state(sb, 0, 0);
fat: introduce special inode for managing the FSINFO block This is patchset makes fatfs stop using the VFS '->write_super()' method for writing out the FSINFO block. The final goal is to get rid of the 'sync_supers()' kernel thread. This kernel thread wakes up every 5 seconds (by default) and calls '->write_super()' for all mounted file-systems. And the bad thing is that this is done even if all the superblocks are clean. Moreover, some file-systems do not even need this end they do not register the '->write_super()' method at all (e.g., btrfs). So 'sync_supers()' most often just generates useless wake-ups and wastes power. I am trying to make all file-systems independent of '->write_super()' and plan to remove 'sync_supers()' and '->write_super' completely once there are no more users. The '->write_supers()' method is mostly used by baroque file-systems like hfs, udf, etc. Modern file-systems like btrfs and xfs do not use it. This justifies removing this stuff from VFS completely and make every FS self-manage own superblock. Tested with xfstests. This patch: Preparation for further changes. It introduces a special inode ('fsinfo_inode') in FAT file-system which we'll later use for managing the FSINFO block. Note, this there is already one special inode ('fat_inode') which is used for managing the FAT tables. Introduce new 'MSDOS_FSINFO_INO' constant for this special inode. It is safe to do because FAT file-system does not store inode numbers on the media but generates them run-time. I've also cleaned up the comment to existing 'MSDOS_ROOT_INO' constant, while on it. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-05-31 16:26:12 -07:00
iput(sbi->fsinfo_inode);
iput(sbi->fat_inode);
call_rcu(&sbi->rcu, delayed_free);
}
static struct kmem_cache *fat_inode_cachep;
static struct inode *fat_alloc_inode(struct super_block *sb)
{
struct msdos_inode_info *ei;
ei = alloc_inode_sb(sb, fat_inode_cachep, GFP_NOFS);
if (!ei)
return NULL;
init_rwsem(&ei->truncate_lock);
/* Zeroing to allow iput() even if partial initialized inode. */
ei->mmu_private = 0;
ei->i_start = 0;
ei->i_logstart = 0;
ei->i_attrs = 0;
ei->i_pos = 0;
ei->i_crtime.tv_sec = 0;
ei->i_crtime.tv_nsec = 0;
return &ei->vfs_inode;
}
static void fat_free_inode(struct inode *inode)
{
kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
}
static void init_once(void *foo)
{
struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
spin_lock_init(&ei->cache_lru_lock);
ei->nr_caches = 0;
ei->cache_valid_id = FAT_CACHE_VALID + 1;
INIT_LIST_HEAD(&ei->cache_lru);
INIT_HLIST_NODE(&ei->i_fat_hash);
INIT_HLIST_NODE(&ei->i_dir_hash);
inode_init_once(&ei->vfs_inode);
}
static int __init fat_init_inodecache(void)
{
fat_inode_cachep = kmem_cache_create("fat_inode_cache",
sizeof(struct msdos_inode_info),
0, (SLAB_RECLAIM_ACCOUNT|
2016-01-14 15:18:21 -08:00
SLAB_MEM_SPREAD|SLAB_ACCOUNT),
init_once);
if (fat_inode_cachep == NULL)
return -ENOMEM;
return 0;
}
static void __exit fat_destroy_inodecache(void)
{
/*
* Make sure all delayed rcu free inodes are flushed before we
* destroy cache.
*/
rcu_barrier();
kmem_cache_destroy(fat_inode_cachep);
}
static int fat_remount(struct super_block *sb, int *flags, char *data)
{
bool new_rdonly;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
Rename superblock flags (MS_xyz -> SB_xyz) This is a pure automated search-and-replace of the internal kernel superblock flags. The s_flags are now called SB_*, with the names and the values for the moment mirroring the MS_* flags that they're equivalent to. Note how the MS_xyz flags are the ones passed to the mount system call, while the SB_xyz flags are what we then use in sb->s_flags. The script to do this was: # places to look in; re security/*: it generally should *not* be # touched (that stuff parses mount(2) arguments directly), but # there are two places where we really deal with superblock flags. FILES="drivers/mtd drivers/staging/lustre fs ipc mm \ include/linux/fs.h include/uapi/linux/bfs_fs.h \ security/apparmor/apparmorfs.c security/apparmor/include/lib.h" # the list of MS_... constants SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \ DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \ POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \ I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \ ACTIVE NOUSER" SED_PROG= for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done # we want files that contain at least one of MS_..., # with fs/namespace.c and fs/pnode.c excluded. L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c') for f in $L; do sed -i $f $SED_PROG; done Requested-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-11-27 13:05:09 -08:00
*flags |= SB_NODIRATIME | (sbi->options.isvfat ? 0 : SB_NOATIME);
fs: push sync_filesystem() down to the file system's remount_fs() Previously, the no-op "mount -o mount /dev/xxx" operation when the file system is already mounted read-write causes an implied, unconditional syncfs(). This seems pretty stupid, and it's certainly documented or guaraunteed to do this, nor is it particularly useful, except in the case where the file system was mounted rw and is getting remounted read-only. However, it's possible that there might be some file systems that are actually depending on this behavior. In most file systems, it's probably fine to only call sync_filesystem() when transitioning from read-write to read-only, and there are some file systems where this is not needed at all (for example, for a pseudo-filesystem or something like romfs). Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: linux-fsdevel@vger.kernel.org Cc: Christoph Hellwig <hch@infradead.org> Cc: Artem Bityutskiy <dedekind1@gmail.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Evgeniy Dushistov <dushistov@mail.ru> Cc: Jan Kara <jack@suse.cz> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Anders Larsen <al@alarsen.net> Cc: Phillip Lougher <phillip@squashfs.org.uk> Cc: Kees Cook <keescook@chromium.org> Cc: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz> Cc: Petr Vandrovec <petr@vandrovec.name> Cc: xfs@oss.sgi.com Cc: linux-btrfs@vger.kernel.org Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Cc: codalist@coda.cs.cmu.edu Cc: linux-ext4@vger.kernel.org Cc: linux-f2fs-devel@lists.sourceforge.net Cc: fuse-devel@lists.sourceforge.net Cc: cluster-devel@redhat.com Cc: linux-mtd@lists.infradead.org Cc: jfs-discussion@lists.sourceforge.net Cc: linux-nfs@vger.kernel.org Cc: linux-nilfs@vger.kernel.org Cc: linux-ntfs-dev@lists.sourceforge.net Cc: ocfs2-devel@oss.oracle.com Cc: reiserfs-devel@vger.kernel.org
2014-03-13 10:14:33 -04:00
sync_filesystem(sb);
/* make sure we update state on remount. */
Rename superblock flags (MS_xyz -> SB_xyz) This is a pure automated search-and-replace of the internal kernel superblock flags. The s_flags are now called SB_*, with the names and the values for the moment mirroring the MS_* flags that they're equivalent to. Note how the MS_xyz flags are the ones passed to the mount system call, while the SB_xyz flags are what we then use in sb->s_flags. The script to do this was: # places to look in; re security/*: it generally should *not* be # touched (that stuff parses mount(2) arguments directly), but # there are two places where we really deal with superblock flags. FILES="drivers/mtd drivers/staging/lustre fs ipc mm \ include/linux/fs.h include/uapi/linux/bfs_fs.h \ security/apparmor/apparmorfs.c security/apparmor/include/lib.h" # the list of MS_... constants SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \ DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \ POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \ I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \ ACTIVE NOUSER" SED_PROG= for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done # we want files that contain at least one of MS_..., # with fs/namespace.c and fs/pnode.c excluded. L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c') for f in $L; do sed -i $f $SED_PROG; done Requested-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-11-27 13:05:09 -08:00
new_rdonly = *flags & SB_RDONLY;
if (new_rdonly != sb_rdonly(sb)) {
if (new_rdonly)
fat_set_state(sb, 0, 0);
else
fat_set_state(sb, 1, 1);
}
return 0;
}
static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
/* If the count of free cluster is still unknown, counts it here. */
if (sbi->free_clusters == -1 || !sbi->free_clus_valid) {
int err = fat_count_free_clusters(dentry->d_sb);
if (err)
return err;
}
buf->f_type = dentry->d_sb->s_magic;
buf->f_bsize = sbi->cluster_size;
buf->f_blocks = sbi->max_cluster - FAT_START_ENT;
buf->f_bfree = sbi->free_clusters;
buf->f_bavail = sbi->free_clusters;
buf->f_fsid = u64_to_fsid(id);
buf->f_namelen =
(sbi->options.isvfat ? FAT_LFN_LEN : 12) * NLS_MAX_CHARSET_SIZE;
return 0;
}
static int __fat_write_inode(struct inode *inode, int wait)
{
struct super_block *sb = inode->i_sb;
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh;
struct msdos_dir_entry *raw_entry;
struct timespec64 mtime;
loff_t i_pos;
sector_t blocknr;
int err, offset;
if (inode->i_ino == MSDOS_ROOT_INO)
return 0;
retry:
i_pos = fat_i_pos_read(sbi, inode);
if (!i_pos)
return 0;
fat_get_blknr_offset(sbi, i_pos, &blocknr, &offset);
bh = sb_bread(sb, blocknr);
if (!bh) {
fat_msg(sb, KERN_ERR, "unable to read inode block "
"for updating (i_pos %lld)", i_pos);
vfat: fix 'sync' mount deadlock due to BKL->lock_super conversion There was another FAT BKL conversion deadlock reported by Bart Trojanowski due to the BKL being used as a recursive lock by FAT, which was missed because it only triggers with 'sync' (or 'dirsync') mounts. The recursion worked for the BKL, but after the conversion to lock_super (which uses a mutex), it just deadlocks. Thanks to Bart for debugging this and testing the fix. The lock debugging information from the original report: ============================================= [ INFO: possible recursive locking detected ] 2.6.27-rc3-bisect-00448-ga7f5aaf #16 --------------------------------------------- mv/4020 is trying to acquire lock: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 but task is already holding lock: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 other info that might help us debug this: 3 locks held by mv/4020: #0: (&sb->s_type->i_mutex_key#9/1){--..}, at: [<c01b2336>] do_unlinkat+0x66/0x140 #1: (&sb->s_type->i_mutex_key#9){--..}, at: [<c01b0954>] vfs_unlink+0x84/0x110 #2: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 stack backtrace: Pid: 4020, comm: mv Not tainted 2.6.27-rc3-bisect-00448-ga7f5aaf #16 [<c014e694>] validate_chain+0x984/0xea0 [<c0108d70>] ? native_sched_clock+0x0/0xf0 [<c014ee9c>] __lock_acquire+0x2ec/0x9b0 [<c014f5cf>] lock_acquire+0x6f/0x90 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c044e5fd>] mutex_lock_nested+0xad/0x300 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c01a90fe>] lock_super+0x1e/0x20 [<f8b3a700>] fat_write_inode+0x60/0x2b0 [fat] [<c0450878>] ? _spin_unlock_irqrestore+0x48/0x80 [<f8b3a953>] ? fat_sync_inode+0x3/0x20 [fat] [<f8b3a962>] fat_sync_inode+0x12/0x20 [fat] [<f8b37c7e>] fat_remove_entries+0xbe/0x120 [fat] [<f8b422ef>] vfat_unlink+0x5f/0x90 [vfat] [<f8b42290>] ? vfat_unlink+0x0/0x90 [vfat] [<c01b0968>] vfs_unlink+0x98/0x110 [<c01b2400>] do_unlinkat+0x130/0x140 [<c016a8f5>] ? audit_syscall_entry+0x105/0x150 [<c01b253b>] sys_unlinkat+0x3b/0x40 [<c01040d3>] sysenter_do_call+0x12/0x3f ======================= where the deadlock is due to the nesting of lock_super from vfat_unlink to fat_write_inode: - do_unlinkat - vfs_unlink - vfat_unlink * lock_super - fat_remove_entries - fat_sync_inode - fat_write_inode * lock_super and the fix is to simply remove the use of lock_super() in fat_write_inode. The lock_super() there had been just an automatic conversion of the kernel lock to the superblock lock, but no locking was actually needed there, since the code in fat_write_inode already protected all relevant accesses with a spinlock (sbi->inode_hash_lock to be exact). The only code inside the BKL (and thus the superblock lock) was accesses tp local variables or calls to functions that have long been SMP-safe (i.e. sb_bread, mark_buffe_dirty and brlese). Bart reports: "Looks good. I ran 10 parallel processes creating 1M files truncating them, writing to them again and then deleting them. This patch fixes the issue I ran into. Signed-off-by: Bart Trojanowski <bart@jukie.net>" Reported-and-tested-by: Bart Trojanowski <bart@jukie.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-08-20 08:31:19 -07:00
return -EIO;
}
spin_lock(&sbi->inode_hash_lock);
if (i_pos != MSDOS_I(inode)->i_pos) {
spin_unlock(&sbi->inode_hash_lock);
brelse(bh);
goto retry;
}
raw_entry = &((struct msdos_dir_entry *) (bh->b_data))[offset];
if (S_ISDIR(inode->i_mode))
raw_entry->size = 0;
else
raw_entry->size = cpu_to_le32(inode->i_size);
raw_entry->attr = fat_make_attrs(inode);
fat_set_start(raw_entry, MSDOS_I(inode)->i_logstart);
mtime = inode_get_mtime(inode);
fat_time_unix2fat(sbi, &mtime, &raw_entry->time,
&raw_entry->date, NULL);
if (sbi->options.isvfat) {
struct timespec64 ts = inode_get_atime(inode);
__le16 atime;
fat_time_unix2fat(sbi, &ts, &atime, &raw_entry->adate, NULL);
fat_time_unix2fat(sbi, &MSDOS_I(inode)->i_crtime, &raw_entry->ctime,
&raw_entry->cdate, &raw_entry->ctime_cs);
}
spin_unlock(&sbi->inode_hash_lock);
mark_buffer_dirty(bh);
vfat: fix 'sync' mount deadlock due to BKL->lock_super conversion There was another FAT BKL conversion deadlock reported by Bart Trojanowski due to the BKL being used as a recursive lock by FAT, which was missed because it only triggers with 'sync' (or 'dirsync') mounts. The recursion worked for the BKL, but after the conversion to lock_super (which uses a mutex), it just deadlocks. Thanks to Bart for debugging this and testing the fix. The lock debugging information from the original report: ============================================= [ INFO: possible recursive locking detected ] 2.6.27-rc3-bisect-00448-ga7f5aaf #16 --------------------------------------------- mv/4020 is trying to acquire lock: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 but task is already holding lock: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 other info that might help us debug this: 3 locks held by mv/4020: #0: (&sb->s_type->i_mutex_key#9/1){--..}, at: [<c01b2336>] do_unlinkat+0x66/0x140 #1: (&sb->s_type->i_mutex_key#9){--..}, at: [<c01b0954>] vfs_unlink+0x84/0x110 #2: (&type->s_lock_key#9){--..}, at: [<c01a90fe>] lock_super+0x1e/0x20 stack backtrace: Pid: 4020, comm: mv Not tainted 2.6.27-rc3-bisect-00448-ga7f5aaf #16 [<c014e694>] validate_chain+0x984/0xea0 [<c0108d70>] ? native_sched_clock+0x0/0xf0 [<c014ee9c>] __lock_acquire+0x2ec/0x9b0 [<c014f5cf>] lock_acquire+0x6f/0x90 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c044e5fd>] mutex_lock_nested+0xad/0x300 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c01a90fe>] ? lock_super+0x1e/0x20 [<c01a90fe>] lock_super+0x1e/0x20 [<f8b3a700>] fat_write_inode+0x60/0x2b0 [fat] [<c0450878>] ? _spin_unlock_irqrestore+0x48/0x80 [<f8b3a953>] ? fat_sync_inode+0x3/0x20 [fat] [<f8b3a962>] fat_sync_inode+0x12/0x20 [fat] [<f8b37c7e>] fat_remove_entries+0xbe/0x120 [fat] [<f8b422ef>] vfat_unlink+0x5f/0x90 [vfat] [<f8b42290>] ? vfat_unlink+0x0/0x90 [vfat] [<c01b0968>] vfs_unlink+0x98/0x110 [<c01b2400>] do_unlinkat+0x130/0x140 [<c016a8f5>] ? audit_syscall_entry+0x105/0x150 [<c01b253b>] sys_unlinkat+0x3b/0x40 [<c01040d3>] sysenter_do_call+0x12/0x3f ======================= where the deadlock is due to the nesting of lock_super from vfat_unlink to fat_write_inode: - do_unlinkat - vfs_unlink - vfat_unlink * lock_super - fat_remove_entries - fat_sync_inode - fat_write_inode * lock_super and the fix is to simply remove the use of lock_super() in fat_write_inode. The lock_super() there had been just an automatic conversion of the kernel lock to the superblock lock, but no locking was actually needed there, since the code in fat_write_inode already protected all relevant accesses with a spinlock (sbi->inode_hash_lock to be exact). The only code inside the BKL (and thus the superblock lock) was accesses tp local variables or calls to functions that have long been SMP-safe (i.e. sb_bread, mark_buffe_dirty and brlese). Bart reports: "Looks good. I ran 10 parallel processes creating 1M files truncating them, writing to them again and then deleting them. This patch fixes the issue I ran into. Signed-off-by: Bart Trojanowski <bart@jukie.net>" Reported-and-tested-by: Bart Trojanowski <bart@jukie.net> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-08-20 08:31:19 -07:00
err = 0;
if (wait)
err = sync_dirty_buffer(bh);
brelse(bh);
return err;
}
static int fat_write_inode(struct inode *inode, struct writeback_control *wbc)
{
fat: switch to fsinfo_inode Currently FAT file-system maps the VFS "superblock" abstraction to the FSINFO block. The FSINFO block contains non-essential data about the amount of free clusters and the next free cluster. FAT file-system can always find out this information by scanning the FAT table, but having it in the FSINFO block may speed things up sometimes. So FAT file-system relies on the VFS superblock write-out services to make sure the FSINFO block is written out to the media from time to time. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblock using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds no matter what. So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super' VFS service, and then remove it together with the kernel thread. This patch switches the FAT FSINFO block management from '->write_super()'/'->s_dirt' to 'fsinfo_inode'/'->write_inode'. Now, instead of setting the 's_dirt' flag, we just mark the special 'fsinfo_inode' inode as dirty and let VFS invoke the '->write_inode' call-back when needed, where we write-out the FSINFO block. This patch also makes sure we do not mark the 'fsinfo_inode' inode as dirty if we are not FAT32 (FAT16 and FAT12 do not have the FSINFO block) or if we are in R/O mode. As a bonus, we can also remove the '->sync_fs()' and '->write_super()' FAT call-back function because they become unneeded. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-05-31 16:26:13 -07:00
int err;
if (inode->i_ino == MSDOS_FSINFO_INO) {
struct super_block *sb = inode->i_sb;
mutex_lock(&MSDOS_SB(sb)->s_lock);
fat: switch to fsinfo_inode Currently FAT file-system maps the VFS "superblock" abstraction to the FSINFO block. The FSINFO block contains non-essential data about the amount of free clusters and the next free cluster. FAT file-system can always find out this information by scanning the FAT table, but having it in the FSINFO block may speed things up sometimes. So FAT file-system relies on the VFS superblock write-out services to make sure the FSINFO block is written out to the media from time to time. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblock using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds no matter what. So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super' VFS service, and then remove it together with the kernel thread. This patch switches the FAT FSINFO block management from '->write_super()'/'->s_dirt' to 'fsinfo_inode'/'->write_inode'. Now, instead of setting the 's_dirt' flag, we just mark the special 'fsinfo_inode' inode as dirty and let VFS invoke the '->write_inode' call-back when needed, where we write-out the FSINFO block. This patch also makes sure we do not mark the 'fsinfo_inode' inode as dirty if we are not FAT32 (FAT16 and FAT12 do not have the FSINFO block) or if we are in R/O mode. As a bonus, we can also remove the '->sync_fs()' and '->write_super()' FAT call-back function because they become unneeded. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-05-31 16:26:13 -07:00
err = fat_clusters_flush(sb);
mutex_unlock(&MSDOS_SB(sb)->s_lock);
fat: switch to fsinfo_inode Currently FAT file-system maps the VFS "superblock" abstraction to the FSINFO block. The FSINFO block contains non-essential data about the amount of free clusters and the next free cluster. FAT file-system can always find out this information by scanning the FAT table, but having it in the FSINFO block may speed things up sometimes. So FAT file-system relies on the VFS superblock write-out services to make sure the FSINFO block is written out to the media from time to time. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblock using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds no matter what. So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super' VFS service, and then remove it together with the kernel thread. This patch switches the FAT FSINFO block management from '->write_super()'/'->s_dirt' to 'fsinfo_inode'/'->write_inode'. Now, instead of setting the 's_dirt' flag, we just mark the special 'fsinfo_inode' inode as dirty and let VFS invoke the '->write_inode' call-back when needed, where we write-out the FSINFO block. This patch also makes sure we do not mark the 'fsinfo_inode' inode as dirty if we are not FAT32 (FAT16 and FAT12 do not have the FSINFO block) or if we are in R/O mode. As a bonus, we can also remove the '->sync_fs()' and '->write_super()' FAT call-back function because they become unneeded. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-05-31 16:26:13 -07:00
} else
err = __fat_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
return err;
}
int fat_sync_inode(struct inode *inode)
{
return __fat_write_inode(inode, 1);
}
EXPORT_SYMBOL_GPL(fat_sync_inode);
static int fat_show_options(struct seq_file *m, struct dentry *root);
static const struct super_operations fat_sops = {
.alloc_inode = fat_alloc_inode,
.free_inode = fat_free_inode,
.write_inode = fat_write_inode,
.evict_inode = fat_evict_inode,
.put_super = fat_put_super,
.statfs = fat_statfs,
.remount_fs = fat_remount,
.show_options = fat_show_options,
};
static int fat_show_options(struct seq_file *m, struct dentry *root)
{
struct msdos_sb_info *sbi = MSDOS_SB(root->d_sb);
struct fat_mount_options *opts = &sbi->options;
int isvfat = opts->isvfat;
if (!uid_eq(opts->fs_uid, GLOBAL_ROOT_UID))
seq_printf(m, ",uid=%u",
from_kuid_munged(&init_user_ns, opts->fs_uid));
if (!gid_eq(opts->fs_gid, GLOBAL_ROOT_GID))
seq_printf(m, ",gid=%u",
from_kgid_munged(&init_user_ns, opts->fs_gid));
seq_printf(m, ",fmask=%04o", opts->fs_fmask);
seq_printf(m, ",dmask=%04o", opts->fs_dmask);
if (opts->allow_utime)
seq_printf(m, ",allow_utime=%04o", opts->allow_utime);
if (sbi->nls_disk)
/* strip "cp" prefix from displayed option */
seq_printf(m, ",codepage=%s", &sbi->nls_disk->charset[2]);
if (isvfat) {
if (sbi->nls_io)
seq_printf(m, ",iocharset=%s", sbi->nls_io->charset);
switch (opts->shortname) {
case VFAT_SFN_DISPLAY_WIN95 | VFAT_SFN_CREATE_WIN95:
seq_puts(m, ",shortname=win95");
break;
case VFAT_SFN_DISPLAY_WINNT | VFAT_SFN_CREATE_WINNT:
seq_puts(m, ",shortname=winnt");
break;
case VFAT_SFN_DISPLAY_WINNT | VFAT_SFN_CREATE_WIN95:
seq_puts(m, ",shortname=mixed");
break;
case VFAT_SFN_DISPLAY_LOWER | VFAT_SFN_CREATE_WIN95:
seq_puts(m, ",shortname=lower");
break;
default:
seq_puts(m, ",shortname=unknown");
break;
}
}
if (opts->name_check != 'n')
seq_printf(m, ",check=%c", opts->name_check);
if (opts->usefree)
seq_puts(m, ",usefree");
if (opts->quiet)
seq_puts(m, ",quiet");
if (opts->showexec)
seq_puts(m, ",showexec");
if (opts->sys_immutable)
seq_puts(m, ",sys_immutable");
if (!isvfat) {
if (opts->dotsOK)
seq_puts(m, ",dotsOK=yes");
if (opts->nocase)
seq_puts(m, ",nocase");
} else {
if (opts->utf8)
seq_puts(m, ",utf8");
if (opts->unicode_xlate)
seq_puts(m, ",uni_xlate");
if (!opts->numtail)
seq_puts(m, ",nonumtail");
if (opts->rodir)
seq_puts(m, ",rodir");
}
if (opts->flush)
seq_puts(m, ",flush");
if (opts->tz_set) {
if (opts->time_offset)
seq_printf(m, ",time_offset=%d", opts->time_offset);
else
seq_puts(m, ",tz=UTC");
}
if (opts->errors == FAT_ERRORS_CONT)
seq_puts(m, ",errors=continue");
else if (opts->errors == FAT_ERRORS_PANIC)
seq_puts(m, ",errors=panic");
else
seq_puts(m, ",errors=remount-ro");
if (opts->nfs == FAT_NFS_NOSTALE_RO)
seq_puts(m, ",nfs=nostale_ro");
else if (opts->nfs)
seq_puts(m, ",nfs=stale_rw");
if (opts->discard)
seq_puts(m, ",discard");
if (opts->dos1xfloppy)
seq_puts(m, ",dos1xfloppy");
return 0;
}
enum {
Opt_check_n, Opt_check_r, Opt_check_s, Opt_uid, Opt_gid,
Opt_umask, Opt_dmask, Opt_fmask, Opt_allow_utime, Opt_codepage,
Opt_usefree, Opt_nocase, Opt_quiet, Opt_showexec, Opt_debug,
Opt_immutable, Opt_dots, Opt_nodots,
Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_time_offset,
Opt_nfs_stale_rw, Opt_nfs_nostale_ro, Opt_err, Opt_dos1xfloppy,
};
static const match_table_t fat_tokens = {
{Opt_check_r, "check=relaxed"},
{Opt_check_s, "check=strict"},
{Opt_check_n, "check=normal"},
{Opt_check_r, "check=r"},
{Opt_check_s, "check=s"},
{Opt_check_n, "check=n"},
{Opt_uid, "uid=%u"},
{Opt_gid, "gid=%u"},
{Opt_umask, "umask=%o"},
{Opt_dmask, "dmask=%o"},
{Opt_fmask, "fmask=%o"},
{Opt_allow_utime, "allow_utime=%o"},
{Opt_codepage, "codepage=%u"},
{Opt_usefree, "usefree"},
{Opt_nocase, "nocase"},
{Opt_quiet, "quiet"},
{Opt_showexec, "showexec"},
{Opt_debug, "debug"},
{Opt_immutable, "sys_immutable"},
{Opt_flush, "flush"},
{Opt_tz_utc, "tz=UTC"},
{Opt_time_offset, "time_offset=%d"},
{Opt_err_cont, "errors=continue"},
{Opt_err_panic, "errors=panic"},
{Opt_err_ro, "errors=remount-ro"},
{Opt_discard, "discard"},
{Opt_nfs_stale_rw, "nfs"},
{Opt_nfs_stale_rw, "nfs=stale_rw"},
{Opt_nfs_nostale_ro, "nfs=nostale_ro"},
{Opt_dos1xfloppy, "dos1xfloppy"},
{Opt_obsolete, "conv=binary"},
{Opt_obsolete, "conv=text"},
{Opt_obsolete, "conv=auto"},
{Opt_obsolete, "conv=b"},
{Opt_obsolete, "conv=t"},
{Opt_obsolete, "conv=a"},
{Opt_obsolete, "fat=%u"},
{Opt_obsolete, "blocksize=%u"},
{Opt_obsolete, "cvf_format=%20s"},
{Opt_obsolete, "cvf_options=%100s"},
{Opt_obsolete, "posix"},
{Opt_err, NULL},
};
static const match_table_t msdos_tokens = {
{Opt_nodots, "nodots"},
{Opt_nodots, "dotsOK=no"},
{Opt_dots, "dots"},
{Opt_dots, "dotsOK=yes"},
{Opt_err, NULL}
};
static const match_table_t vfat_tokens = {
{Opt_charset, "iocharset=%s"},
{Opt_shortname_lower, "shortname=lower"},
{Opt_shortname_win95, "shortname=win95"},
{Opt_shortname_winnt, "shortname=winnt"},
{Opt_shortname_mixed, "shortname=mixed"},
{Opt_utf8_no, "utf8=0"}, /* 0 or no or false */
{Opt_utf8_no, "utf8=no"},
{Opt_utf8_no, "utf8=false"},
{Opt_utf8_yes, "utf8=1"}, /* empty or 1 or yes or true */
{Opt_utf8_yes, "utf8=yes"},
{Opt_utf8_yes, "utf8=true"},
{Opt_utf8_yes, "utf8"},
{Opt_uni_xl_no, "uni_xlate=0"}, /* 0 or no or false */
{Opt_uni_xl_no, "uni_xlate=no"},
{Opt_uni_xl_no, "uni_xlate=false"},
{Opt_uni_xl_yes, "uni_xlate=1"}, /* empty or 1 or yes or true */
{Opt_uni_xl_yes, "uni_xlate=yes"},
{Opt_uni_xl_yes, "uni_xlate=true"},
{Opt_uni_xl_yes, "uni_xlate"},
{Opt_nonumtail_no, "nonumtail=0"}, /* 0 or no or false */
{Opt_nonumtail_no, "nonumtail=no"},
{Opt_nonumtail_no, "nonumtail=false"},
{Opt_nonumtail_yes, "nonumtail=1"}, /* empty or 1 or yes or true */
{Opt_nonumtail_yes, "nonumtail=yes"},
{Opt_nonumtail_yes, "nonumtail=true"},
{Opt_nonumtail_yes, "nonumtail"},
{Opt_rodir, "rodir"},
{Opt_err, NULL}
};
static int parse_options(struct super_block *sb, char *options, int is_vfat,
int silent, int *debug, struct fat_mount_options *opts)
{
char *p;
substring_t args[MAX_OPT_ARGS];
int option;
char *iocharset;
opts->isvfat = is_vfat;
opts->fs_uid = current_uid();
opts->fs_gid = current_gid();
opts->fs_fmask = opts->fs_dmask = current_umask();
opts->allow_utime = -1;
opts->codepage = fat_default_codepage;
fat_reset_iocharset(opts);
if (is_vfat) {
opts->shortname = VFAT_SFN_DISPLAY_WINNT|VFAT_SFN_CREATE_WIN95;
opts->rodir = 0;
} else {
opts->shortname = 0;
opts->rodir = 1;
}
opts->name_check = 'n';
opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
opts->unicode_xlate = 0;
opts->numtail = 1;
opts->usefree = opts->nocase = 0;
opts->tz_set = 0;
opts->nfs = 0;
opts->errors = FAT_ERRORS_RO;
*debug = 0;
opts->utf8 = IS_ENABLED(CONFIG_FAT_DEFAULT_UTF8) && is_vfat;
if (!options)
goto out;
while ((p = strsep(&options, ",")) != NULL) {
int token;
if (!*p)
continue;
token = match_token(p, fat_tokens, args);
if (token == Opt_err) {
if (is_vfat)
token = match_token(p, vfat_tokens, args);
else
token = match_token(p, msdos_tokens, args);
}
switch (token) {
case Opt_check_s:
opts->name_check = 's';
break;
case Opt_check_r:
opts->name_check = 'r';
break;
case Opt_check_n:
opts->name_check = 'n';
break;
case Opt_usefree:
opts->usefree = 1;
break;
case Opt_nocase:
if (!is_vfat)
opts->nocase = 1;
else {
/* for backward compatibility */
opts->shortname = VFAT_SFN_DISPLAY_WIN95
| VFAT_SFN_CREATE_WIN95;
}
break;
case Opt_quiet:
opts->quiet = 1;
break;
case Opt_showexec:
opts->showexec = 1;
break;
case Opt_debug:
*debug = 1;
break;
case Opt_immutable:
opts->sys_immutable = 1;
break;
case Opt_uid:
if (match_int(&args[0], &option))
return -EINVAL;
opts->fs_uid = make_kuid(current_user_ns(), option);
if (!uid_valid(opts->fs_uid))
return -EINVAL;
break;
case Opt_gid:
if (match_int(&args[0], &option))
return -EINVAL;
opts->fs_gid = make_kgid(current_user_ns(), option);
if (!gid_valid(opts->fs_gid))
return -EINVAL;
break;
case Opt_umask:
if (match_octal(&args[0], &option))
return -EINVAL;
opts->fs_fmask = opts->fs_dmask = option;
break;
case Opt_dmask:
if (match_octal(&args[0], &option))
return -EINVAL;
opts->fs_dmask = option;
break;
case Opt_fmask:
if (match_octal(&args[0], &option))
return -EINVAL;
opts->fs_fmask = option;
break;
case Opt_allow_utime:
if (match_octal(&args[0], &option))
return -EINVAL;
opts->allow_utime = option & (S_IWGRP | S_IWOTH);
break;
case Opt_codepage:
if (match_int(&args[0], &option))
return -EINVAL;
opts->codepage = option;
break;
case Opt_flush:
opts->flush = 1;
break;
case Opt_time_offset:
if (match_int(&args[0], &option))
return -EINVAL;
/*
* GMT+-12 zones may have DST corrections so at least
* 13 hours difference is needed. Make the limit 24
* just in case someone invents something unusual.
*/
if (option < -24 * 60 || option > 24 * 60)
return -EINVAL;
opts->tz_set = 1;
opts->time_offset = option;
break;
fatfs: add UTC timestamp option Provide a new mount option ("tz=UTC") for DOS (vfat/msdos) filesystems, allowing timestamps to be in coordinated universal time (UTC) rather than local time in applications where doing this is advantageous. In particular, portable devices that use fat/vfat (such as digital cameras) can benefit from using UTC in their internal clocks, thus avoiding daylight saving time errors and general time ambiguity issues. The user of the device does not have to worry about changing the time when moving from place or when daylight saving changes. The new mount option, when set, disables the counter-adjustment that Linux currently makes to FAT timestamp info in anticipation of the normal userspace time zone correction. When used in this new mode, all daylight saving time and time zone handling is done in userspace as is normal for many other filesystems (like ext3). The default mode, which remains unchanged, is still appropriate when mounting volumes written in Windows (because of its use of local time). I originally based this patch on one submitted last year by Paul Collins, but I updated it to work with current source and changed variable/option naming. Ogawa Hirofumi (who maintains these filesystems) and I discussed this patch at length on lkml, and he suggested using the option name in the attached version of the patch. Barry Bouwsma pointed out a good addition to the patch as well. Signed-off-by: Joe Peterson <joe@skyrush.com> Signed-off-by: Paul Collins <paul@ondioline.org> Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Barry Bouwsma <free_beer_for_all@yahoo.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-25 01:46:47 -07:00
case Opt_tz_utc:
opts->tz_set = 1;
opts->time_offset = 0;
fatfs: add UTC timestamp option Provide a new mount option ("tz=UTC") for DOS (vfat/msdos) filesystems, allowing timestamps to be in coordinated universal time (UTC) rather than local time in applications where doing this is advantageous. In particular, portable devices that use fat/vfat (such as digital cameras) can benefit from using UTC in their internal clocks, thus avoiding daylight saving time errors and general time ambiguity issues. The user of the device does not have to worry about changing the time when moving from place or when daylight saving changes. The new mount option, when set, disables the counter-adjustment that Linux currently makes to FAT timestamp info in anticipation of the normal userspace time zone correction. When used in this new mode, all daylight saving time and time zone handling is done in userspace as is normal for many other filesystems (like ext3). The default mode, which remains unchanged, is still appropriate when mounting volumes written in Windows (because of its use of local time). I originally based this patch on one submitted last year by Paul Collins, but I updated it to work with current source and changed variable/option naming. Ogawa Hirofumi (who maintains these filesystems) and I discussed this patch at length on lkml, and he suggested using the option name in the attached version of the patch. Barry Bouwsma pointed out a good addition to the patch as well. Signed-off-by: Joe Peterson <joe@skyrush.com> Signed-off-by: Paul Collins <paul@ondioline.org> Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Barry Bouwsma <free_beer_for_all@yahoo.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-25 01:46:47 -07:00
break;
case Opt_err_cont:
opts->errors = FAT_ERRORS_CONT;
break;
case Opt_err_panic:
opts->errors = FAT_ERRORS_PANIC;
break;
case Opt_err_ro:
opts->errors = FAT_ERRORS_RO;
break;
case Opt_nfs_stale_rw:
opts->nfs = FAT_NFS_STALE_RW;
break;
case Opt_nfs_nostale_ro:
opts->nfs = FAT_NFS_NOSTALE_RO;
break;
case Opt_dos1xfloppy:
opts->dos1xfloppy = 1;
break;
/* msdos specific */
case Opt_dots:
opts->dotsOK = 1;
break;
case Opt_nodots:
opts->dotsOK = 0;
break;
/* vfat specific */
case Opt_charset:
fat_reset_iocharset(opts);
iocharset = match_strdup(&args[0]);
if (!iocharset)
return -ENOMEM;
opts->iocharset = iocharset;
break;
case Opt_shortname_lower:
opts->shortname = VFAT_SFN_DISPLAY_LOWER
| VFAT_SFN_CREATE_WIN95;
break;
case Opt_shortname_win95:
opts->shortname = VFAT_SFN_DISPLAY_WIN95
| VFAT_SFN_CREATE_WIN95;
break;
case Opt_shortname_winnt:
opts->shortname = VFAT_SFN_DISPLAY_WINNT
| VFAT_SFN_CREATE_WINNT;
break;
case Opt_shortname_mixed:
opts->shortname = VFAT_SFN_DISPLAY_WINNT
| VFAT_SFN_CREATE_WIN95;
break;
case Opt_utf8_no: /* 0 or no or false */
opts->utf8 = 0;
break;
case Opt_utf8_yes: /* empty or 1 or yes or true */
opts->utf8 = 1;
break;
case Opt_uni_xl_no: /* 0 or no or false */
opts->unicode_xlate = 0;
break;
case Opt_uni_xl_yes: /* empty or 1 or yes or true */
opts->unicode_xlate = 1;
break;
case Opt_nonumtail_no: /* 0 or no or false */
opts->numtail = 1; /* negated option */
break;
case Opt_nonumtail_yes: /* empty or 1 or yes or true */
opts->numtail = 0; /* negated option */
break;
case Opt_rodir:
opts->rodir = 1;
break;
case Opt_discard:
opts->discard = 1;
break;
/* obsolete mount options */
case Opt_obsolete:
fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, "
"not supported now", p);
break;
/* unknown option */
default:
if (!silent) {
fat_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" "
"or missing value", p);
}
return -EINVAL;
}
}
out:
/* UTF-8 doesn't provide FAT semantics */
if (!strcmp(opts->iocharset, "utf8")) {
fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset"
" for FAT filesystems, filesystem will be "
"case sensitive!");
}
/* If user doesn't specify allow_utime, it's initialized from dmask. */
if (opts->allow_utime == (unsigned short)-1)
opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
if (opts->unicode_xlate)
opts->utf8 = 0;
if (opts->nfs == FAT_NFS_NOSTALE_RO) {
Rename superblock flags (MS_xyz -> SB_xyz) This is a pure automated search-and-replace of the internal kernel superblock flags. The s_flags are now called SB_*, with the names and the values for the moment mirroring the MS_* flags that they're equivalent to. Note how the MS_xyz flags are the ones passed to the mount system call, while the SB_xyz flags are what we then use in sb->s_flags. The script to do this was: # places to look in; re security/*: it generally should *not* be # touched (that stuff parses mount(2) arguments directly), but # there are two places where we really deal with superblock flags. FILES="drivers/mtd drivers/staging/lustre fs ipc mm \ include/linux/fs.h include/uapi/linux/bfs_fs.h \ security/apparmor/apparmorfs.c security/apparmor/include/lib.h" # the list of MS_... constants SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \ DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \ POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \ I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \ ACTIVE NOUSER" SED_PROG= for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done # we want files that contain at least one of MS_..., # with fs/namespace.c and fs/pnode.c excluded. L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c') for f in $L; do sed -i $f $SED_PROG; done Requested-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-11-27 13:05:09 -08:00
sb->s_flags |= SB_RDONLY;
sb->s_export_op = &fat_export_ops_nostale;
}
return 0;
}
static int fat_read_root(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
int error;
MSDOS_I(inode)->i_pos = MSDOS_ROOT_INO;
inode->i_uid = sbi->options.fs_uid;
inode->i_gid = sbi->options.fs_gid;
inode_inc_iversion(inode);
inode->i_generation = 0;
inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO);
inode->i_op = sbi->dir_ops;
inode->i_fop = &fat_dir_operations;
if (is_fat32(sbi)) {
MSDOS_I(inode)->i_start = sbi->root_cluster;
error = fat_calc_dir_size(inode);
if (error < 0)
return error;
} else {
MSDOS_I(inode)->i_start = 0;
inode->i_size = sbi->dir_entries * sizeof(struct msdos_dir_entry);
}
inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
MSDOS_I(inode)->i_logstart = 0;
MSDOS_I(inode)->mmu_private = inode->i_size;
fat_save_attrs(inode, ATTR_DIR);
inode_set_mtime_to_ts(inode,
inode_set_atime_to_ts(inode, inode_set_ctime(inode, 0, 0)));
set_nlink(inode, fat_subdirs(inode)+2);
return 0;
}
static unsigned long calc_fat_clusters(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
/* Divide first to avoid overflow */
if (!is_fat12(sbi)) {
unsigned long ent_per_sec = sb->s_blocksize * 8 / sbi->fat_bits;
return ent_per_sec * sbi->fat_length;
}
return sbi->fat_length * sb->s_blocksize * 8 / sbi->fat_bits;
}
static bool fat_bpb_is_zero(struct fat_boot_sector *b)
{
if (get_unaligned_le16(&b->sector_size))
return false;
if (b->sec_per_clus)
return false;
if (b->reserved)
return false;
if (b->fats)
return false;
if (get_unaligned_le16(&b->dir_entries))
return false;
if (get_unaligned_le16(&b->sectors))
return false;
if (b->media)
return false;
if (b->fat_length)
return false;
if (b->secs_track)
return false;
if (b->heads)
return false;
return true;
}
static int fat_read_bpb(struct super_block *sb, struct fat_boot_sector *b,
int silent, struct fat_bios_param_block *bpb)
{
int error = -EINVAL;
/* Read in BPB ... */
memset(bpb, 0, sizeof(*bpb));
bpb->fat_sector_size = get_unaligned_le16(&b->sector_size);
bpb->fat_sec_per_clus = b->sec_per_clus;
bpb->fat_reserved = le16_to_cpu(b->reserved);
bpb->fat_fats = b->fats;
bpb->fat_dir_entries = get_unaligned_le16(&b->dir_entries);
bpb->fat_sectors = get_unaligned_le16(&b->sectors);
bpb->fat_fat_length = le16_to_cpu(b->fat_length);
bpb->fat_total_sect = le32_to_cpu(b->total_sect);
bpb->fat16_state = b->fat16.state;
bpb->fat16_vol_id = get_unaligned_le32(b->fat16.vol_id);
bpb->fat32_length = le32_to_cpu(b->fat32.length);
bpb->fat32_root_cluster = le32_to_cpu(b->fat32.root_cluster);
bpb->fat32_info_sector = le16_to_cpu(b->fat32.info_sector);
bpb->fat32_state = b->fat32.state;
bpb->fat32_vol_id = get_unaligned_le32(b->fat32.vol_id);
/* Validate this looks like a FAT filesystem BPB */
if (!bpb->fat_reserved) {
if (!silent)
fat_msg(sb, KERN_ERR,
"bogus number of reserved sectors");
goto out;
}
if (!bpb->fat_fats) {
if (!silent)
fat_msg(sb, KERN_ERR, "bogus number of FAT structure");
goto out;
}
/*
* Earlier we checked here that b->secs_track and b->head are nonzero,
* but it turns out valid FAT filesystems can have zero there.
*/
if (!fat_valid_media(b->media)) {
if (!silent)
fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)",
(unsigned)b->media);
goto out;
}
if (!is_power_of_2(bpb->fat_sector_size)
|| (bpb->fat_sector_size < 512)
|| (bpb->fat_sector_size > 4096)) {
if (!silent)
fat_msg(sb, KERN_ERR, "bogus logical sector size %u",
(unsigned)bpb->fat_sector_size);
goto out;
}
if (!is_power_of_2(bpb->fat_sec_per_clus)) {
if (!silent)
fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u",
(unsigned)bpb->fat_sec_per_clus);
goto out;
}
if (bpb->fat_fat_length == 0 && bpb->fat32_length == 0) {
if (!silent)
fat_msg(sb, KERN_ERR, "bogus number of FAT sectors");
goto out;
}
error = 0;
out:
return error;
}
static int fat_read_static_bpb(struct super_block *sb,
struct fat_boot_sector *b, int silent,
struct fat_bios_param_block *bpb)
{
static const char *notdos1x = "This doesn't look like a DOS 1.x volume";
sector_t bd_sects = bdev_nr_sectors(sb->s_bdev);
struct fat_floppy_defaults *fdefaults = NULL;
int error = -EINVAL;
unsigned i;
/* 16-bit DOS 1.x reliably wrote bootstrap short-jmp code */
if (b->ignored[0] != 0xeb || b->ignored[2] != 0x90) {
if (!silent)
fat_msg(sb, KERN_ERR,
"%s; no bootstrapping code", notdos1x);
goto out;
}
/*
* If any value in this region is non-zero, it isn't archaic
* DOS.
*/
if (!fat_bpb_is_zero(b)) {
if (!silent)
fat_msg(sb, KERN_ERR,
"%s; DOS 2.x BPB is non-zero", notdos1x);
goto out;
}
for (i = 0; i < ARRAY_SIZE(floppy_defaults); i++) {
if (floppy_defaults[i].nr_sectors == bd_sects) {
fdefaults = &floppy_defaults[i];
break;
}
}
if (fdefaults == NULL) {
if (!silent)
fat_msg(sb, KERN_WARNING,
"This looks like a DOS 1.x volume, but isn't a recognized floppy size (%llu sectors)",
(u64)bd_sects);
goto out;
}
if (!silent)
fat_msg(sb, KERN_INFO,
"This looks like a DOS 1.x volume; assuming default BPB values");
memset(bpb, 0, sizeof(*bpb));
bpb->fat_sector_size = SECTOR_SIZE;
bpb->fat_sec_per_clus = fdefaults->sec_per_clus;
bpb->fat_reserved = 1;
bpb->fat_fats = 2;
bpb->fat_dir_entries = fdefaults->dir_entries;
bpb->fat_sectors = fdefaults->nr_sectors;
bpb->fat_fat_length = fdefaults->fat_length;
error = 0;
out:
return error;
}
/*
* Read the super block of an MS-DOS FS.
*/
int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
void (*setup)(struct super_block *))
{
struct inode *root_inode = NULL, *fat_inode = NULL;
fat: introduce special inode for managing the FSINFO block This is patchset makes fatfs stop using the VFS '->write_super()' method for writing out the FSINFO block. The final goal is to get rid of the 'sync_supers()' kernel thread. This kernel thread wakes up every 5 seconds (by default) and calls '->write_super()' for all mounted file-systems. And the bad thing is that this is done even if all the superblocks are clean. Moreover, some file-systems do not even need this end they do not register the '->write_super()' method at all (e.g., btrfs). So 'sync_supers()' most often just generates useless wake-ups and wastes power. I am trying to make all file-systems independent of '->write_super()' and plan to remove 'sync_supers()' and '->write_super' completely once there are no more users. The '->write_supers()' method is mostly used by baroque file-systems like hfs, udf, etc. Modern file-systems like btrfs and xfs do not use it. This justifies removing this stuff from VFS completely and make every FS self-manage own superblock. Tested with xfstests. This patch: Preparation for further changes. It introduces a special inode ('fsinfo_inode') in FAT file-system which we'll later use for managing the FSINFO block. Note, this there is already one special inode ('fat_inode') which is used for managing the FAT tables. Introduce new 'MSDOS_FSINFO_INO' constant for this special inode. It is safe to do because FAT file-system does not store inode numbers on the media but generates them run-time. I've also cleaned up the comment to existing 'MSDOS_ROOT_INO' constant, while on it. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-05-31 16:26:12 -07:00
struct inode *fsinfo_inode = NULL;
struct buffer_head *bh;
struct fat_bios_param_block bpb;
struct msdos_sb_info *sbi;
u16 logical_sector_size;
u32 total_sectors, total_clusters, fat_clusters, rootdir_sectors;
int debug;
long error;
char buf[50];
struct timespec64 ts;
/*
* GFP_KERNEL is ok here, because while we do hold the
* superblock lock, memory pressure can't call back into
* the filesystem, since we're only just about to mount
* it and have no inodes etc active!
*/
sbi = kzalloc(sizeof(struct msdos_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
sb->s_fs_info = sbi;
Rename superblock flags (MS_xyz -> SB_xyz) This is a pure automated search-and-replace of the internal kernel superblock flags. The s_flags are now called SB_*, with the names and the values for the moment mirroring the MS_* flags that they're equivalent to. Note how the MS_xyz flags are the ones passed to the mount system call, while the SB_xyz flags are what we then use in sb->s_flags. The script to do this was: # places to look in; re security/*: it generally should *not* be # touched (that stuff parses mount(2) arguments directly), but # there are two places where we really deal with superblock flags. FILES="drivers/mtd drivers/staging/lustre fs ipc mm \ include/linux/fs.h include/uapi/linux/bfs_fs.h \ security/apparmor/apparmorfs.c security/apparmor/include/lib.h" # the list of MS_... constants SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \ DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \ POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \ I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \ ACTIVE NOUSER" SED_PROG= for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done # we want files that contain at least one of MS_..., # with fs/namespace.c and fs/pnode.c excluded. L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c') for f in $L; do sed -i $f $SED_PROG; done Requested-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-11-27 13:05:09 -08:00
sb->s_flags |= SB_NODIRATIME;
sb->s_magic = MSDOS_SUPER_MAGIC;
sb->s_op = &fat_sops;
sb->s_export_op = &fat_export_ops;
/*
* fat timestamps are complex and truncated by fat itself, so
* we set 1 here to be fast
*/
sb->s_time_gran = 1;
mutex_init(&sbi->nfs_build_inode_lock);
ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options);
if (error)
goto out_fail;
setup(sb); /* flavour-specific stuff that needs options */
error = -EIO;
sb_min_blocksize(sb, 512);
bh = sb_bread(sb, 0);
if (bh == NULL) {
fat_msg(sb, KERN_ERR, "unable to read boot sector");
goto out_fail;
}
error = fat_read_bpb(sb, (struct fat_boot_sector *)bh->b_data, silent,
&bpb);
if (error == -EINVAL && sbi->options.dos1xfloppy)
error = fat_read_static_bpb(sb,
(struct fat_boot_sector *)bh->b_data, silent, &bpb);
brelse(bh);
if (error == -EINVAL)
goto out_invalid;
else if (error)
goto out_fail;
logical_sector_size = bpb.fat_sector_size;
sbi->sec_per_clus = bpb.fat_sec_per_clus;
error = -EIO;
if (logical_sector_size < sb->s_blocksize) {
fat_msg(sb, KERN_ERR, "logical sector size too small for device"
" (logical sector size = %u)", logical_sector_size);
goto out_fail;
}
if (logical_sector_size > sb->s_blocksize) {
struct buffer_head *bh_resize;
if (!sb_set_blocksize(sb, logical_sector_size)) {
fat_msg(sb, KERN_ERR, "unable to set blocksize %u",
logical_sector_size);
goto out_fail;
}
/* Verify that the larger boot sector is fully readable */
bh_resize = sb_bread(sb, 0);
if (bh_resize == NULL) {
fat_msg(sb, KERN_ERR, "unable to read boot sector"
" (logical sector size = %lu)",
sb->s_blocksize);
goto out_fail;
}
brelse(bh_resize);
}
mutex_init(&sbi->s_lock);
sbi->cluster_size = sb->s_blocksize * sbi->sec_per_clus;
sbi->cluster_bits = ffs(sbi->cluster_size) - 1;
sbi->fats = bpb.fat_fats;
sbi->fat_bits = 0; /* Don't know yet */
sbi->fat_start = bpb.fat_reserved;
sbi->fat_length = bpb.fat_fat_length;
sbi->root_cluster = 0;
sbi->free_clusters = -1; /* Don't know yet */
sbi->free_clus_valid = 0;
sbi->prev_free = FAT_START_ENT;
sb->s_maxbytes = 0xffffffff;
fat_time_fat2unix(sbi, &ts, 0, cpu_to_le16(FAT_DATE_MIN), 0);
sb->s_time_min = ts.tv_sec;
fat_time_fat2unix(sbi, &ts, cpu_to_le16(FAT_TIME_MAX),
cpu_to_le16(FAT_DATE_MAX), 0);
sb->s_time_max = ts.tv_sec;
if (!sbi->fat_length && bpb.fat32_length) {
struct fat_boot_fsinfo *fsinfo;
struct buffer_head *fsinfo_bh;
/* Must be FAT32 */
sbi->fat_bits = 32;
sbi->fat_length = bpb.fat32_length;
sbi->root_cluster = bpb.fat32_root_cluster;
/* MC - if info_sector is 0, don't multiply by 0 */
sbi->fsinfo_sector = bpb.fat32_info_sector;
if (sbi->fsinfo_sector == 0)
sbi->fsinfo_sector = 1;
fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector);
if (fsinfo_bh == NULL) {
fat_msg(sb, KERN_ERR, "bread failed, FSINFO block"
" (sector = %lu)", sbi->fsinfo_sector);
goto out_fail;
}
fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data;
if (!IS_FSINFO(fsinfo)) {
fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: "
"0x%08x, 0x%08x (sector = %lu)",
le32_to_cpu(fsinfo->signature1),
le32_to_cpu(fsinfo->signature2),
sbi->fsinfo_sector);
} else {
if (sbi->options.usefree)
sbi->free_clus_valid = 1;
sbi->free_clusters = le32_to_cpu(fsinfo->free_clusters);
sbi->prev_free = le32_to_cpu(fsinfo->next_cluster);
}
brelse(fsinfo_bh);
}
/* interpret volume ID as a little endian 32 bit integer */
if (is_fat32(sbi))
sbi->vol_id = bpb.fat32_vol_id;
else /* fat 16 or 12 */
sbi->vol_id = bpb.fat16_vol_id;
sbi->dir_per_block = sb->s_blocksize / sizeof(struct msdos_dir_entry);
sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length;
sbi->dir_entries = bpb.fat_dir_entries;
if (sbi->dir_entries & (sbi->dir_per_block - 1)) {
if (!silent)
fat_msg(sb, KERN_ERR, "bogus number of directory entries"
" (%u)", sbi->dir_entries);
goto out_invalid;
}
rootdir_sectors = sbi->dir_entries
* sizeof(struct msdos_dir_entry) / sb->s_blocksize;
sbi->data_start = sbi->dir_start + rootdir_sectors;
total_sectors = bpb.fat_sectors;
if (total_sectors == 0)
total_sectors = bpb.fat_total_sect;
total_clusters = (total_sectors - sbi->data_start) / sbi->sec_per_clus;
if (!is_fat32(sbi))
sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12;
/* some OSes set FAT_STATE_DIRTY and clean it on unmount. */
if (is_fat32(sbi))
sbi->dirty = bpb.fat32_state & FAT_STATE_DIRTY;
else /* fat 16 or 12 */
sbi->dirty = bpb.fat16_state & FAT_STATE_DIRTY;
/* check that FAT table does not overflow */
fat_clusters = calc_fat_clusters(sb);
total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT);
if (total_clusters > max_fat(sb)) {
if (!silent)
fat_msg(sb, KERN_ERR, "count of clusters too big (%u)",
total_clusters);
goto out_invalid;
}
sbi->max_cluster = total_clusters + FAT_START_ENT;
/* check the free_clusters, it's not necessarily correct */
if (sbi->free_clusters != -1 && sbi->free_clusters > total_clusters)
sbi->free_clusters = -1;
/* check the prev_free, it's not necessarily correct */
sbi->prev_free %= sbi->max_cluster;
if (sbi->prev_free < FAT_START_ENT)
sbi->prev_free = FAT_START_ENT;
/* set up enough so that it can read an inode */
fat_hash_init(sb);
dir_hash_init(sb);
fat_ent_access_init(sb);
/*
* The low byte of the first FAT entry must have the same value as
* the media field of the boot sector. But in real world, too many
* devices are writing wrong values. So, removed that validity check.
*
* The removed check compared the first FAT entry to a value dependent
* on the media field like this:
* == (0x0F00 | media), for FAT12
* == (0XFF00 | media), for FAT16
* == (0x0FFFFF | media), for FAT32
*/
error = -EINVAL;
sprintf(buf, "cp%d", sbi->options.codepage);
sbi->nls_disk = load_nls(buf);
if (!sbi->nls_disk) {
fat_msg(sb, KERN_ERR, "codepage %s not found", buf);
goto out_fail;
}
/* FIXME: utf8 is using iocharset for upper/lower conversion */
if (sbi->options.isvfat) {
sbi->nls_io = load_nls(sbi->options.iocharset);
if (!sbi->nls_io) {
fat_msg(sb, KERN_ERR, "IO charset %s not found",
sbi->options.iocharset);
goto out_fail;
}
}
error = -ENOMEM;
fat_inode = new_inode(sb);
if (!fat_inode)
goto out_fail;
sbi->fat_inode = fat_inode;
fat: introduce special inode for managing the FSINFO block This is patchset makes fatfs stop using the VFS '->write_super()' method for writing out the FSINFO block. The final goal is to get rid of the 'sync_supers()' kernel thread. This kernel thread wakes up every 5 seconds (by default) and calls '->write_super()' for all mounted file-systems. And the bad thing is that this is done even if all the superblocks are clean. Moreover, some file-systems do not even need this end they do not register the '->write_super()' method at all (e.g., btrfs). So 'sync_supers()' most often just generates useless wake-ups and wastes power. I am trying to make all file-systems independent of '->write_super()' and plan to remove 'sync_supers()' and '->write_super' completely once there are no more users. The '->write_supers()' method is mostly used by baroque file-systems like hfs, udf, etc. Modern file-systems like btrfs and xfs do not use it. This justifies removing this stuff from VFS completely and make every FS self-manage own superblock. Tested with xfstests. This patch: Preparation for further changes. It introduces a special inode ('fsinfo_inode') in FAT file-system which we'll later use for managing the FSINFO block. Note, this there is already one special inode ('fat_inode') which is used for managing the FAT tables. Introduce new 'MSDOS_FSINFO_INO' constant for this special inode. It is safe to do because FAT file-system does not store inode numbers on the media but generates them run-time. I've also cleaned up the comment to existing 'MSDOS_ROOT_INO' constant, while on it. Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-05-31 16:26:12 -07:00
fsinfo_inode = new_inode(sb);
if (!fsinfo_inode)
goto out_fail;
fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
sbi->fsinfo_inode = fsinfo_inode;
insert_inode_hash(fsinfo_inode);
root_inode = new_inode(sb);
if (!root_inode)
goto out_fail;
root_inode->i_ino = MSDOS_ROOT_INO;
inode_set_iversion(root_inode, 1);
error = fat_read_root(root_inode);
if (error < 0) {
iput(root_inode);
goto out_fail;
}
error = -ENOMEM;
insert_inode_hash(root_inode);
fat_attach(root_inode, 0);
sb->s_root = d_make_root(root_inode);
if (!sb->s_root) {
fat_msg(sb, KERN_ERR, "get root inode failed");
goto out_fail;
}
if (sbi->options.discard && !bdev_max_discard_sectors(sb->s_bdev))
fat_msg(sb, KERN_WARNING,
"mounting with \"discard\" option, but the device does not support discard");
fat_set_state(sb, 1, 0);
return 0;
out_invalid:
error = -EINVAL;
if (!silent)
fat_msg(sb, KERN_INFO, "Can't find a valid FAT filesystem");
out_fail:
iput(fsinfo_inode);
iput(fat_inode);
unload_nls(sbi->nls_io);
unload_nls(sbi->nls_disk);
fat_reset_iocharset(&sbi->options);
sb->s_fs_info = NULL;
kfree(sbi);
return error;
}
EXPORT_SYMBOL_GPL(fat_fill_super);
/*
* helper function for fat_flush_inodes. This writes both the inode
* and the file data blocks, waiting for in flight data blocks before
* the start of the call. It does not wait for any io started
* during the call
*/
static int writeback_inode(struct inode *inode)
{
int ret;
/* if we used wait=1, sync_inode_metadata waits for the io for the
* inode to finish. So wait=0 is sent down to sync_inode_metadata
* and filemap_fdatawrite is used for the data blocks
*/
ret = sync_inode_metadata(inode, 0);
if (!ret)
ret = filemap_fdatawrite(inode->i_mapping);
return ret;
}
/*
* write data and metadata corresponding to i1 and i2. The io is
* started but we do not wait for any of it to finish.
*
* filemap_flush is used for the block device, so if there is a dirty
* page for a block already in flight, we will not wait and start the
* io over again
*/
int fat_flush_inodes(struct super_block *sb, struct inode *i1, struct inode *i2)
{
int ret = 0;
if (!MSDOS_SB(sb)->options.flush)
return 0;
if (i1)
ret = writeback_inode(i1);
if (!ret && i2)
ret = writeback_inode(i2);
if (!ret)
ret = sync_blockdev_nowait(sb->s_bdev);
return ret;
}
EXPORT_SYMBOL_GPL(fat_flush_inodes);
static int __init init_fat_fs(void)
{
int err;
err = fat_cache_init();
if (err)
return err;
err = fat_init_inodecache();
if (err)
goto failed;
return 0;
failed:
fat_cache_destroy();
return err;
}
static void __exit exit_fat_fs(void)
{
fat_cache_destroy();
fat_destroy_inodecache();
}
module_init(init_fat_fs)
module_exit(exit_fat_fs)
MODULE_LICENSE("GPL");