linux/fs/ubifs/ubifs.h

2160 lines
72 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* This file is part of UBIFS.
*
* Copyright (C) 2006-2008 Nokia Corporation
*
* Authors: Artem Bityutskiy (Битюцкий Артём)
* Adrian Hunter
*/
#ifndef __UBIFS_H__
#define __UBIFS_H__
#include <asm/div64.h>
#include <linux/statfs.h>
#include <linux/fs.h>
#include <linux/err.h>
#include <linux/sched.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 17:04:11 +09:00
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/mtd/ubi.h>
#include <linux/pagemap.h>
#include <linux/backing-dev.h>
#include <linux/security.h>
#include <linux/xattr.h>
#include <linux/random.h>
#include <linux/sysfs.h>
#include <linux/completion.h>
#include <crypto/hash_info.h>
#include <crypto/hash.h>
#include <crypto/algapi.h>
#include <linux/fscrypt.h>
#include "ubifs-media.h"
/* Version of this UBIFS implementation */
#define UBIFS_VERSION 1
/* UBIFS file system VFS magic number */
#define UBIFS_SUPER_MAGIC 0x24051905
/* Number of UBIFS blocks per VFS page */
mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - <foo> << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - <foo> >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> <foo>; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Acked-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-04-01 15:29:47 +03:00
#define UBIFS_BLOCKS_PER_PAGE (PAGE_SIZE / UBIFS_BLOCK_SIZE)
#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_SHIFT - UBIFS_BLOCK_SHIFT)
/* "File system end of life" sequence number watermark */
#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL
#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL
/*
* Minimum amount of LEBs reserved for the index. At present the index needs at
* least 2 LEBs: one for the index head and one for in-the-gaps method (which
* currently does not cater for the index head and so excludes it from
* consideration).
*/
#define MIN_INDEX_LEBS 2
/* Minimum amount of data UBIFS writes to the flash */
#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8)
/*
* Currently we do not support inode number overlapping and re-using, so this
* watermark defines dangerous inode number level. This should be fixed later,
* although it is difficult to exceed current limit. Another option is to use
* 64-bit inode numbers, but this means more overhead.
*/
#define INUM_WARN_WATERMARK 0xFFF00000
#define INUM_WATERMARK 0xFFFFFF00
/* Maximum number of entries in each LPT (LEB category) heap */
#define LPT_HEAP_SZ 256
/*
* Background thread name pattern. The numbers are UBI device and volume
* numbers.
*/
#define BGT_NAME_PATTERN "ubifs_bgt%d_%d"
/* Maximum possible inode number (only 32-bit inodes are supported now) */
#define MAX_INUM 0xFFFFFFFF
/* Number of non-data journal heads */
#define NONDATA_JHEADS_CNT 2
/* Shorter names for journal head numbers for internal usage */
#define GCHD UBIFS_GC_HEAD
#define BASEHD UBIFS_BASE_HEAD
#define DATAHD UBIFS_DATA_HEAD
/* 'No change' value for 'ubifs_change_lp()' */
#define LPROPS_NC 0x80000001
/*
* There is no notion of truncation key because truncation nodes do not exist
* in TNC. However, when replaying, it is handy to introduce fake "truncation"
* keys for truncation nodes because the code becomes simpler. So we define
* %UBIFS_TRUN_KEY type.
*
* But otherwise, out of the journal reply scope, the truncation keys are
* invalid.
*/
#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
#define UBIFS_INVALID_KEY UBIFS_KEY_TYPES_CNT
/*
* How much a directory entry/extended attribute entry adds to the parent/host
* inode.
*/
#define CALC_DENT_SIZE(name_len) ALIGN(UBIFS_DENT_NODE_SZ + (name_len) + 1, 8)
/* How much an extended attribute adds to the host inode */
#define CALC_XATTR_BYTES(data_len) ALIGN(UBIFS_INO_NODE_SZ + (data_len) + 1, 8)
/*
* Znodes which were not touched for 'OLD_ZNODE_AGE' seconds are considered
* "old", and znode which were touched last 'YOUNG_ZNODE_AGE' seconds ago are
* considered "young". This is used by shrinker when selecting znode to trim
* off.
*/
#define OLD_ZNODE_AGE 20
#define YOUNG_ZNODE_AGE 5
/*
* Some compressors, like LZO, may end up with more data then the input buffer.
* So UBIFS always allocates larger output buffer, to be sure the compressor
* will not corrupt memory in case of worst case compression.
*/
#define WORST_COMPR_FACTOR 2
#ifdef CONFIG_FS_ENCRYPTION
#define UBIFS_CIPHER_BLOCK_SIZE FS_CRYPTO_BLOCK_SIZE
#else
#define UBIFS_CIPHER_BLOCK_SIZE 0
#endif
/*
* How much memory is needed for a buffer where we compress a data node.
*/
#define COMPRESSED_DATA_NODE_BUF_SZ \
(UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR)
/* Maximum expected tree height for use by bottom_up_buf */
#define BOTTOM_UP_HEIGHT 64
/* Maximum number of data nodes to bulk-read */
#define UBIFS_MAX_BULK_READ 32
#ifdef CONFIG_UBIFS_FS_AUTHENTICATION
#define UBIFS_HASH_ARR_SZ UBIFS_MAX_HASH_LEN
#define UBIFS_HMAC_ARR_SZ UBIFS_MAX_HMAC_LEN
#else
#define UBIFS_HASH_ARR_SZ 0
#define UBIFS_HMAC_ARR_SZ 0
#endif
/*
* The UBIFS sysfs directory name pattern and maximum name length (3 for "ubi"
* + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte.
*/
#define UBIFS_DFS_DIR_NAME "ubi%d_%d"
#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1)
/*
* Lockdep classes for UBIFS inode @ui_mutex.
*/
enum {
WB_MUTEX_1 = 0,
WB_MUTEX_2 = 1,
WB_MUTEX_3 = 2,
WB_MUTEX_4 = 3,
};
/*
* Znode flags (actually, bit numbers which store the flags).
*
* DIRTY_ZNODE: znode is dirty
* COW_ZNODE: znode is being committed and a new instance of this znode has to
* be created before changing this znode
* OBSOLETE_ZNODE: znode is obsolete, which means it was deleted, but it is
* still in the commit list and the ongoing commit operation
* will commit it, and delete this znode after it is done
*/
enum {
DIRTY_ZNODE = 0,
COW_ZNODE = 1,
OBSOLETE_ZNODE = 2,
};
/*
* Commit states.
*
* COMMIT_RESTING: commit is not wanted
* COMMIT_BACKGROUND: background commit has been requested
* COMMIT_REQUIRED: commit is required
* COMMIT_RUNNING_BACKGROUND: background commit is running
* COMMIT_RUNNING_REQUIRED: commit is running and it is required
* COMMIT_BROKEN: commit failed
*/
enum {
COMMIT_RESTING = 0,
COMMIT_BACKGROUND,
COMMIT_REQUIRED,
COMMIT_RUNNING_BACKGROUND,
COMMIT_RUNNING_REQUIRED,
COMMIT_BROKEN,
};
/*
* 'ubifs_scan_a_node()' return values.
*
* SCANNED_GARBAGE: scanned garbage
* SCANNED_EMPTY_SPACE: scanned empty space
* SCANNED_A_NODE: scanned a valid node
* SCANNED_A_CORRUPT_NODE: scanned a corrupted node
* SCANNED_A_BAD_PAD_NODE: scanned a padding node with invalid pad length
*
* Greater than zero means: 'scanned that number of padding bytes'
*/
enum {
SCANNED_GARBAGE = 0,
SCANNED_EMPTY_SPACE = -1,
SCANNED_A_NODE = -2,
SCANNED_A_CORRUPT_NODE = -3,
SCANNED_A_BAD_PAD_NODE = -4,
};
/*
* LPT cnode flag bits.
*
* DIRTY_CNODE: cnode is dirty
* OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted),
* so it can (and must) be freed when the commit is finished
* COW_CNODE: cnode is being committed and must be copied before writing
*/
enum {
DIRTY_CNODE = 0,
OBSOLETE_CNODE = 1,
COW_CNODE = 2,
};
/*
* Dirty flag bits (lpt_drty_flgs) for LPT special nodes.
*
* LTAB_DIRTY: ltab node is dirty
* LSAVE_DIRTY: lsave node is dirty
*/
enum {
LTAB_DIRTY = 1,
LSAVE_DIRTY = 2,
};
/*
* Return codes used by the garbage collector.
* @LEB_FREED: the logical eraseblock was freed and is ready to use
* @LEB_FREED_IDX: indexing LEB was freed and can be used only after the commit
* @LEB_RETAINED: the logical eraseblock was freed and retained for GC purposes
*/
enum {
LEB_FREED,
LEB_FREED_IDX,
LEB_RETAINED,
};
/*
* Action taken upon a failed ubifs_assert().
* @ASSACT_REPORT: just report the failed assertion
* @ASSACT_RO: switch to read-only mode
* @ASSACT_PANIC: call BUG() and possible panic the kernel
*/
enum {
ASSACT_REPORT = 0,
ASSACT_RO,
ASSACT_PANIC,
};
/**
* struct ubifs_old_idx - index node obsoleted since last commit start.
* @rb: rb-tree node
* @lnum: LEB number of obsoleted index node
* @offs: offset of obsoleted index node
*/
struct ubifs_old_idx {
struct rb_node rb;
int lnum;
int offs;
};
/* The below union makes it easier to deal with keys */
union ubifs_key {
uint8_t u8[UBIFS_SK_LEN];
uint32_t u32[UBIFS_SK_LEN/4];
uint64_t u64[UBIFS_SK_LEN/8];
__le32 j32[UBIFS_SK_LEN/4];
};
/**
* struct ubifs_scan_node - UBIFS scanned node information.
* @list: list of scanned nodes
* @key: key of node scanned (if it has one)
* @sqnum: sequence number
* @type: type of node scanned
* @offs: offset with LEB of node scanned
* @len: length of node scanned
* @node: raw node
*/
struct ubifs_scan_node {
struct list_head list;
union ubifs_key key;
unsigned long long sqnum;
int type;
int offs;
int len;
void *node;
};
/**
* struct ubifs_scan_leb - UBIFS scanned LEB information.
* @lnum: logical eraseblock number
* @nodes_cnt: number of nodes scanned
* @nodes: list of struct ubifs_scan_node
* @endpt: end point (and therefore the start of empty space)
* @buf: buffer containing entire LEB scanned
*/
struct ubifs_scan_leb {
int lnum;
int nodes_cnt;
struct list_head nodes;
int endpt;
void *buf;
};
/**
* struct ubifs_gced_idx_leb - garbage-collected indexing LEB.
* @list: list
* @lnum: LEB number
* @unmap: OK to unmap this LEB
*
* This data structure is used to temporary store garbage-collected indexing
* LEBs - they are not released immediately, but only after the next commit.
* This is needed to guarantee recoverability.
*/
struct ubifs_gced_idx_leb {
struct list_head list;
int lnum;
int unmap;
};
/**
* struct ubifs_inode - UBIFS in-memory inode description.
* @vfs_inode: VFS inode description object
* @creat_sqnum: sequence number at time of creation
* @del_cmtno: commit number corresponding to the time the inode was deleted,
* protected by @c->commit_sem;
* @xattr_size: summarized size of all extended attributes in bytes
* @xattr_cnt: count of extended attributes this inode has
* @xattr_names: sum of lengths of all extended attribute names belonging to
* this inode
* @dirty: non-zero if the inode is dirty
* @xattr: non-zero if this is an extended attribute inode
* @bulk_read: non-zero if bulk-read should be used
* @ui_mutex: serializes inode write-back with the rest of VFS operations,
* serializes "clean <-> dirty" state changes, serializes bulk-read,
* protects @dirty, @bulk_read, @ui_size, and @xattr_size
* @xattr_sem: serilizes write operations (remove|set|create) on xattr
* @ui_lock: protects @synced_i_size
* @synced_i_size: synchronized size of inode, i.e. the value of inode size
* currently stored on the flash; used only for regular file
* inodes
* @ui_size: inode size used by UBIFS when writing to flash
* @flags: inode flags (@UBIFS_COMPR_FL, etc)
* @compr_type: default compression type used for this inode
* @last_page_read: page number of last page read (for bulk read)
* @read_in_a_row: number of consecutive pages read in a row (for bulk read)
* @data_len: length of the data attached to the inode
* @data: inode's data
*
* @ui_mutex exists for two main reasons. At first it prevents inodes from
* being written back while UBIFS changing them, being in the middle of an VFS
* operation. This way UBIFS makes sure the inode fields are consistent. For
* example, in 'ubifs_rename()' we change 4 inodes simultaneously, and
* write-back must not write any of them before we have finished.
*
* The second reason is budgeting - UBIFS has to budget all operations. If an
* operation is going to mark an inode dirty, it has to allocate budget for
* this. It cannot just mark it dirty because there is no guarantee there will
* be enough flash space to write the inode back later. This means UBIFS has
* to have full control over inode "clean <-> dirty" transitions (and pages
* actually). But unfortunately, VFS marks inodes dirty in many places, and it
* does not ask the file-system if it is allowed to do so (there is a notifier,
* but it is not enough), i.e., there is no mechanism to synchronize with this.
* So UBIFS has its own inode dirty flag and its own mutex to serialize
* "clean <-> dirty" transitions.
*
* The @synced_i_size field is used to make sure we never write pages which are
* beyond last synchronized inode size. See 'ubifs_writepage()' for more
* information.
*
* The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
* @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
* make sure @inode->i_size is always changed under @ui_mutex, because it
* cannot call 'truncate_setsize()' with @ui_mutex locked, because it would
* deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields
* are changed under @ui_mutex, so they do not need "shadow" fields. Note, one
* could consider to rework locking and base it on "shadow" fields.
*/
struct ubifs_inode {
struct inode vfs_inode;
unsigned long long creat_sqnum;
unsigned long long del_cmtno;
unsigned int xattr_size;
unsigned int xattr_cnt;
unsigned int xattr_names;
unsigned int dirty:1;
unsigned int xattr:1;
unsigned int bulk_read:1;
unsigned int compr_type:2;
struct mutex ui_mutex;
struct rw_semaphore xattr_sem;
spinlock_t ui_lock;
loff_t synced_i_size;
loff_t ui_size;
int flags;
pgoff_t last_page_read;
pgoff_t read_in_a_row;
int data_len;
void *data;
};
/**
* struct ubifs_unclean_leb - records a LEB recovered under read-only mode.
* @list: list
* @lnum: LEB number of recovered LEB
* @endpt: offset where recovery ended
*
* This structure records a LEB identified during recovery that needs to be
* cleaned but was not because UBIFS was mounted read-only. The information
* is used to clean the LEB when remounting to read-write mode.
*/
struct ubifs_unclean_leb {
struct list_head list;
int lnum;
int endpt;
};
/*
* LEB properties flags.
*
* LPROPS_UNCAT: not categorized
* LPROPS_DIRTY: dirty > free, dirty >= @c->dead_wm, not index
* LPROPS_DIRTY_IDX: dirty + free > @c->min_idx_node_sze and index
* LPROPS_FREE: free > 0, dirty < @c->dead_wm, not empty, not index
* LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs
* LPROPS_EMPTY: LEB is empty, not taken
* LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken
* LPROPS_FRDI_IDX: free + dirty == leb_size and index, may be taken
* LPROPS_CAT_MASK: mask for the LEB categories above
* LPROPS_TAKEN: LEB was taken (this flag is not saved on the media)
* LPROPS_INDEX: LEB contains indexing nodes (this flag also exists on flash)
*/
enum {
LPROPS_UNCAT = 0,
LPROPS_DIRTY = 1,
LPROPS_DIRTY_IDX = 2,
LPROPS_FREE = 3,
LPROPS_HEAP_CNT = 3,
LPROPS_EMPTY = 4,
LPROPS_FREEABLE = 5,
LPROPS_FRDI_IDX = 6,
LPROPS_CAT_MASK = 15,
LPROPS_TAKEN = 16,
LPROPS_INDEX = 32,
};
/**
* struct ubifs_lprops - logical eraseblock properties.
* @free: amount of free space in bytes
* @dirty: amount of dirty space in bytes
* @flags: LEB properties flags (see above)
* @lnum: LEB number
* @list: list of same-category lprops (for LPROPS_EMPTY and LPROPS_FREEABLE)
* @hpos: heap position in heap of same-category lprops (other categories)
*/
struct ubifs_lprops {
int free;
int dirty;
int flags;
int lnum;
union {
struct list_head list;
int hpos;
};
};
/**
* struct ubifs_lpt_lprops - LPT logical eraseblock properties.
* @free: amount of free space in bytes
* @dirty: amount of dirty space in bytes
* @tgc: trivial GC flag (1 => unmap after commit end)
* @cmt: commit flag (1 => reserved for commit)
*/
struct ubifs_lpt_lprops {
int free;
int dirty;
unsigned tgc:1;
unsigned cmt:1;
};
/**
* struct ubifs_lp_stats - statistics of eraseblocks in the main area.
* @empty_lebs: number of empty LEBs
* @taken_empty_lebs: number of taken LEBs
* @idx_lebs: number of indexing LEBs
* @total_free: total free space in bytes (includes all LEBs)
* @total_dirty: total dirty space in bytes (includes all LEBs)
* @total_used: total used space in bytes (does not include index LEBs)
* @total_dead: total dead space in bytes (does not include index LEBs)
* @total_dark: total dark space in bytes (does not include index LEBs)
*
* The @taken_empty_lebs field counts the LEBs that are in the transient state
* of having been "taken" for use but not yet written to. @taken_empty_lebs is
* needed to account correctly for @gc_lnum, otherwise @empty_lebs could be
* used by itself (in which case 'unused_lebs' would be a better name). In the
* case of @gc_lnum, it is "taken" at mount time or whenever a LEB is retained
* by GC, but unlike other empty LEBs that are "taken", it may not be written
* straight away (i.e. before the next commit start or unmount), so either
* @gc_lnum must be specially accounted for, or the current approach followed
* i.e. count it under @taken_empty_lebs.
*
* @empty_lebs includes @taken_empty_lebs.
*
* @total_used, @total_dead and @total_dark fields do not account indexing
* LEBs.
*/
struct ubifs_lp_stats {
int empty_lebs;
int taken_empty_lebs;
int idx_lebs;
long long total_free;
long long total_dirty;
long long total_used;
long long total_dead;
long long total_dark;
};
struct ubifs_nnode;
/**
* struct ubifs_cnode - LEB Properties Tree common node.
* @parent: parent nnode
* @cnext: next cnode to commit
* @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE)
* @iip: index in parent
* @level: level in the tree (zero for pnodes, greater than zero for nnodes)
* @num: node number
*/
struct ubifs_cnode {
struct ubifs_nnode *parent;
struct ubifs_cnode *cnext;
unsigned long flags;
int iip;
int level;
int num;
};
/**
* struct ubifs_pnode - LEB Properties Tree leaf node.
* @parent: parent nnode
* @cnext: next cnode to commit
* @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE)
* @iip: index in parent
* @level: level in the tree (always zero for pnodes)
* @num: node number
* @lprops: LEB properties array
*/
struct ubifs_pnode {
struct ubifs_nnode *parent;
struct ubifs_cnode *cnext;
unsigned long flags;
int iip;
int level;
int num;
struct ubifs_lprops lprops[UBIFS_LPT_FANOUT];
};
/**
* struct ubifs_nbranch - LEB Properties Tree internal node branch.
* @lnum: LEB number of child
* @offs: offset of child
* @nnode: nnode child
* @pnode: pnode child
* @cnode: cnode child
*/
struct ubifs_nbranch {
int lnum;
int offs;
union {
struct ubifs_nnode *nnode;
struct ubifs_pnode *pnode;
struct ubifs_cnode *cnode;
};
};
/**
* struct ubifs_nnode - LEB Properties Tree internal node.
* @parent: parent nnode
* @cnext: next cnode to commit
* @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE)
* @iip: index in parent
* @level: level in the tree (always greater than zero for nnodes)
* @num: node number
* @nbranch: branches to child nodes
*/
struct ubifs_nnode {
struct ubifs_nnode *parent;
struct ubifs_cnode *cnext;
unsigned long flags;
int iip;
int level;
int num;
struct ubifs_nbranch nbranch[UBIFS_LPT_FANOUT];
};
/**
* struct ubifs_lpt_heap - heap of categorized lprops.
* @arr: heap array
* @cnt: number in heap
* @max_cnt: maximum number allowed in heap
*
* There are %LPROPS_HEAP_CNT heaps.
*/
struct ubifs_lpt_heap {
struct ubifs_lprops **arr;
int cnt;
int max_cnt;
};
/*
* Return codes for LPT scan callback function.
*
* LPT_SCAN_CONTINUE: continue scanning
* LPT_SCAN_ADD: add the LEB properties scanned to the tree in memory
* LPT_SCAN_STOP: stop scanning
*/
enum {
LPT_SCAN_CONTINUE = 0,
LPT_SCAN_ADD = 1,
LPT_SCAN_STOP = 2,
};
struct ubifs_info;
/* Callback used by the 'ubifs_lpt_scan_nolock()' function */
typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
const struct ubifs_lprops *lprops,
int in_tree, void *data);
/**
* struct ubifs_wbuf - UBIFS write-buffer.
* @c: UBIFS file-system description object
* @buf: write-buffer (of min. flash I/O unit size)
* @lnum: logical eraseblock number the write-buffer points to
* @offs: write-buffer offset in this logical eraseblock
* @avail: number of bytes available in the write-buffer
* @used: number of used bytes in the write-buffer
* @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range)
* @jhead: journal head the mutex belongs to (note, needed only to shut lockdep
* up by 'mutex_lock_nested()).
* @sync_callback: write-buffer synchronization callback
* @io_mutex: serializes write-buffer I/O
* @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes
* fields
* @timer: write-buffer timer
* @no_timer: non-zero if this write-buffer does not have a timer
* @need_sync: non-zero if the timer expired and the wbuf needs sync'ing
* @next_ino: points to the next position of the following inode number
* @inodes: stores the inode numbers of the nodes which are in wbuf
*
* The write-buffer synchronization callback is called when the write-buffer is
* synchronized in order to notify how much space was wasted due to
* write-buffer padding and how much free space is left in the LEB.
*
* Note: the fields @buf, @lnum, @offs, @avail and @used can be read under
* spin-lock or mutex because they are written under both mutex and spin-lock.
* @buf is appended to under mutex but overwritten under both mutex and
* spin-lock. Thus the data between @buf and @buf + @used can be read under
* spinlock.
*/
struct ubifs_wbuf {
struct ubifs_info *c;
void *buf;
int lnum;
int offs;
int avail;
int used;
int size;
int jhead;
int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad);
struct mutex io_mutex;
spinlock_t lock;
struct hrtimer timer;
unsigned int no_timer:1;
unsigned int need_sync:1;
int next_ino;
ino_t *inodes;
};
/**
* struct ubifs_bud - bud logical eraseblock.
* @lnum: logical eraseblock number
* @start: where the (uncommitted) bud data starts
* @jhead: journal head number this bud belongs to
* @list: link in the list buds belonging to the same journal head
* @rb: link in the tree of all buds
ubifs: Add authentication nodes to journal Nodes that are written to flash can only be authenticated through the index after the next commit. When a journal replay is necessary the nodes are not yet referenced by the index and thus can't be authenticated. This patch overcomes this situation by creating a hash over all nodes beginning from the commit start node over the reference node(s) and the buds themselves. From time to time we insert authentication nodes. Authentication nodes contain a HMAC from the current hash state, so that they can be used to authenticate a journal replay up to the point where the authentication node is. The hash is continued afterwards so that theoretically we would only have to check the HMAC of the last authentication node we find. Overall we get this picture: ,,,,,,,, ,......,........................................... ,. CS , hash1.----. hash2.----. ,. | , . |hmac . |hmac ,. v , . v . v ,.REF#0,-> bud -> bud -> bud.-> auth -> bud -> bud.-> auth ... ,..|...,........................................... , | , , | ,,,,,,,,,,,,,,, . | hash3,----. , | , |hmac , v , v , REF#1 -> bud -> bud,-> auth ... ,,,|,,,,,,,,,,,,,,,,,, v REF#2 -> ... | V ... Note how hash3 covers CS, REF#0 and REF#1 so that it is not possible to exchange or skip any reference nodes. Unlike the picture suggests the auth nodes themselves are not hashed. With this it is possible for an offline attacker to cut each journal head or to drop the last reference node(s), but not to skip any journal heads or to reorder any operations. Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de> Signed-off-by: Richard Weinberger <richard@nod.at>
2018-09-07 14:36:36 +02:00
* @log_hash: the log hash from the commit start node up to this bud
*/
struct ubifs_bud {
int lnum;
int start;
int jhead;
struct list_head list;
struct rb_node rb;
ubifs: Add authentication nodes to journal Nodes that are written to flash can only be authenticated through the index after the next commit. When a journal replay is necessary the nodes are not yet referenced by the index and thus can't be authenticated. This patch overcomes this situation by creating a hash over all nodes beginning from the commit start node over the reference node(s) and the buds themselves. From time to time we insert authentication nodes. Authentication nodes contain a HMAC from the current hash state, so that they can be used to authenticate a journal replay up to the point where the authentication node is. The hash is continued afterwards so that theoretically we would only have to check the HMAC of the last authentication node we find. Overall we get this picture: ,,,,,,,, ,......,........................................... ,. CS , hash1.----. hash2.----. ,. | , . |hmac . |hmac ,. v , . v . v ,.REF#0,-> bud -> bud -> bud.-> auth -> bud -> bud.-> auth ... ,..|...,........................................... , | , , | ,,,,,,,,,,,,,,, . | hash3,----. , | , |hmac , v , v , REF#1 -> bud -> bud,-> auth ... ,,,|,,,,,,,,,,,,,,,,,, v REF#2 -> ... | V ... Note how hash3 covers CS, REF#0 and REF#1 so that it is not possible to exchange or skip any reference nodes. Unlike the picture suggests the auth nodes themselves are not hashed. With this it is possible for an offline attacker to cut each journal head or to drop the last reference node(s), but not to skip any journal heads or to reorder any operations. Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de> Signed-off-by: Richard Weinberger <richard@nod.at>
2018-09-07 14:36:36 +02:00
struct shash_desc *log_hash;
};
/**
* struct ubifs_jhead - journal head.
* @wbuf: head's write-buffer
* @buds_list: list of bud LEBs belonging to this journal head
* @grouped: non-zero if UBIFS groups nodes when writing to this journal head
ubifs: Add authentication nodes to journal Nodes that are written to flash can only be authenticated through the index after the next commit. When a journal replay is necessary the nodes are not yet referenced by the index and thus can't be authenticated. This patch overcomes this situation by creating a hash over all nodes beginning from the commit start node over the reference node(s) and the buds themselves. From time to time we insert authentication nodes. Authentication nodes contain a HMAC from the current hash state, so that they can be used to authenticate a journal replay up to the point where the authentication node is. The hash is continued afterwards so that theoretically we would only have to check the HMAC of the last authentication node we find. Overall we get this picture: ,,,,,,,, ,......,........................................... ,. CS , hash1.----. hash2.----. ,. | , . |hmac . |hmac ,. v , . v . v ,.REF#0,-> bud -> bud -> bud.-> auth -> bud -> bud.-> auth ... ,..|...,........................................... , | , , | ,,,,,,,,,,,,,,, . | hash3,----. , | , |hmac , v , v , REF#1 -> bud -> bud,-> auth ... ,,,|,,,,,,,,,,,,,,,,,, v REF#2 -> ... | V ... Note how hash3 covers CS, REF#0 and REF#1 so that it is not possible to exchange or skip any reference nodes. Unlike the picture suggests the auth nodes themselves are not hashed. With this it is possible for an offline attacker to cut each journal head or to drop the last reference node(s), but not to skip any journal heads or to reorder any operations. Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de> Signed-off-by: Richard Weinberger <richard@nod.at>
2018-09-07 14:36:36 +02:00
* @log_hash: the log hash from the commit start node up to this journal head
*
* Note, the @buds list is protected by the @c->buds_lock.
*/
struct ubifs_jhead {
struct ubifs_wbuf wbuf;
struct list_head buds_list;
unsigned int grouped:1;
ubifs: Add authentication nodes to journal Nodes that are written to flash can only be authenticated through the index after the next commit. When a journal replay is necessary the nodes are not yet referenced by the index and thus can't be authenticated. This patch overcomes this situation by creating a hash over all nodes beginning from the commit start node over the reference node(s) and the buds themselves. From time to time we insert authentication nodes. Authentication nodes contain a HMAC from the current hash state, so that they can be used to authenticate a journal replay up to the point where the authentication node is. The hash is continued afterwards so that theoretically we would only have to check the HMAC of the last authentication node we find. Overall we get this picture: ,,,,,,,, ,......,........................................... ,. CS , hash1.----. hash2.----. ,. | , . |hmac . |hmac ,. v , . v . v ,.REF#0,-> bud -> bud -> bud.-> auth -> bud -> bud.-> auth ... ,..|...,........................................... , | , , | ,,,,,,,,,,,,,,, . | hash3,----. , | , |hmac , v , v , REF#1 -> bud -> bud,-> auth ... ,,,|,,,,,,,,,,,,,,,,,, v REF#2 -> ... | V ... Note how hash3 covers CS, REF#0 and REF#1 so that it is not possible to exchange or skip any reference nodes. Unlike the picture suggests the auth nodes themselves are not hashed. With this it is possible for an offline attacker to cut each journal head or to drop the last reference node(s), but not to skip any journal heads or to reorder any operations. Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de> Signed-off-by: Richard Weinberger <richard@nod.at>
2018-09-07 14:36:36 +02:00
struct shash_desc *log_hash;
};
/**
* struct ubifs_zbranch - key/coordinate/length branch stored in znodes.
* @key: key
* @znode: znode address in memory
* @lnum: LEB number of the target node (indexing node or data node)
* @offs: target node offset within @lnum
* @len: target node length
* @hash: the hash of the target node
*/
struct ubifs_zbranch {
union ubifs_key key;
union {
struct ubifs_znode *znode;
void *leaf;
};
int lnum;
int offs;
int len;
u8 hash[UBIFS_HASH_ARR_SZ];
};
/**
* struct ubifs_znode - in-memory representation of an indexing node.
* @parent: parent znode or NULL if it is the root
* @cnext: next znode to commit
* @cparent: parent node for this commit
* @ciip: index in cparent's zbranch array
* @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE)
* @time: last access time (seconds)
* @level: level of the entry in the TNC tree
* @child_cnt: count of child znodes
* @iip: index in parent's zbranch array
* @alt: lower bound of key range has altered i.e. child inserted at slot 0
* @lnum: LEB number of the corresponding indexing node
* @offs: offset of the corresponding indexing node
* @len: length of the corresponding indexing node
* @zbranch: array of znode branches (@c->fanout elements)
*
* Note! The @lnum, @offs, and @len fields are not really needed - we have them
* only for internal consistency check. They could be removed to save some RAM.
*/
struct ubifs_znode {
struct ubifs_znode *parent;
struct ubifs_znode *cnext;
struct ubifs_znode *cparent;
int ciip;
unsigned long flags;
time64_t time;
int level;
int child_cnt;
int iip;
int alt;
int lnum;
int offs;
int len;
struct ubifs_zbranch zbranch[];
};
/**
* struct bu_info - bulk-read information.
* @key: first data node key
* @zbranch: zbranches of data nodes to bulk read
* @buf: buffer to read into
* @buf_len: buffer length
* @gc_seq: GC sequence number to detect races with GC
* @cnt: number of data nodes for bulk read
* @blk_cnt: number of data blocks including holes
* @oef: end of file reached
*/
struct bu_info {
union ubifs_key key;
struct ubifs_zbranch zbranch[UBIFS_MAX_BULK_READ];
void *buf;
int buf_len;
int gc_seq;
int cnt;
int blk_cnt;
int eof;
};
/**
* struct ubifs_node_range - node length range description data structure.
* @len: fixed node length
* @min_len: minimum possible node length
* @max_len: maximum possible node length
*
* If @max_len is %0, the node has fixed length @len.
*/
struct ubifs_node_range {
union {
int len;
int min_len;
};
int max_len;
};
/**
* struct ubifs_compressor - UBIFS compressor description structure.
* @compr_type: compressor type (%UBIFS_COMPR_LZO, etc)
* @cc: cryptoapi compressor handle
* @comp_mutex: mutex used during compression
* @decomp_mutex: mutex used during decompression
* @name: compressor name
* @capi_name: cryptoapi compressor name
*/
struct ubifs_compressor {
int compr_type;
struct crypto_comp *cc;
struct mutex *comp_mutex;
struct mutex *decomp_mutex;
const char *name;
const char *capi_name;
};
/**
* struct ubifs_budget_req - budget requirements of an operation.
*
* @fast: non-zero if the budgeting should try to acquire budget quickly and
* should not try to call write-back
* @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields
* have to be re-calculated
* @new_page: non-zero if the operation adds a new page
* @dirtied_page: non-zero if the operation makes a page dirty
* @new_dent: non-zero if the operation adds a new directory entry
* @mod_dent: non-zero if the operation removes or modifies an existing
* directory entry
* @new_ino: non-zero if the operation adds a new inode
* @new_ino_d: how much data newly created inode contains
* @dirtied_ino: how many inodes the operation makes dirty
* @dirtied_ino_d: how much data dirtied inode contains
* @idx_growth: how much the index will supposedly grow
* @data_growth: how much new data the operation will supposedly add
* @dd_growth: how much data that makes other data dirty the operation will
* supposedly add
*
* @idx_growth, @data_growth and @dd_growth are not used in budget request. The
* budgeting subsystem caches index and data growth values there to avoid
* re-calculating them when the budget is released. However, if @idx_growth is
* %-1, it is calculated by the release function using other fields.
*
* An inode may contain 4KiB of data at max., thus the widths of @new_ino_d
* is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made
* dirty by the re-name operation.
*
* Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to
* make sure the amount of inode data which contribute to @new_ino_d and
* @dirtied_ino_d fields are aligned.
*/
struct ubifs_budget_req {
unsigned int fast:1;
unsigned int recalculate:1;
#ifndef UBIFS_DEBUG
unsigned int new_page:1;
unsigned int dirtied_page:1;
unsigned int new_dent:1;
unsigned int mod_dent:1;
unsigned int new_ino:1;
unsigned int new_ino_d:13;
unsigned int dirtied_ino:4;
unsigned int dirtied_ino_d:15;
#else
/* Not bit-fields to check for overflows */
unsigned int new_page;
unsigned int dirtied_page;
unsigned int new_dent;
unsigned int mod_dent;
unsigned int new_ino;
unsigned int new_ino_d;
unsigned int dirtied_ino;
unsigned int dirtied_ino_d;
#endif
int idx_growth;
int data_growth;
int dd_growth;
};
/**
* struct ubifs_orphan - stores the inode number of an orphan.
* @rb: rb-tree node of rb-tree of orphans sorted by inode number
* @list: list head of list of orphans in order added
* @new_list: list head of list of orphans added since the last commit
* @child_list: list of xattr children if this orphan hosts xattrs, list head
* if this orphan is a xattr, not used otherwise.
* @cnext: next orphan to commit
* @dnext: next orphan to delete
* @inum: inode number
* @new: %1 => added since the last commit, otherwise %0
* @cmt: %1 => commit pending, otherwise %0
* @del: %1 => delete pending, otherwise %0
*/
struct ubifs_orphan {
struct rb_node rb;
struct list_head list;
struct list_head new_list;
struct list_head child_list;
struct ubifs_orphan *cnext;
struct ubifs_orphan *dnext;
ino_t inum;
unsigned new:1;
unsigned cmt:1;
unsigned del:1;
};
/**
* struct ubifs_mount_opts - UBIFS-specific mount options information.
* @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
* @bulk_read: enable/disable bulk-reads (%0 default, %1 disable, %2 enable)
* @chk_data_crc: enable/disable CRC data checking when reading data nodes
* (%0 default, %1 disable, %2 enable)
* @override_compr: override default compressor (%0 - do not override and use
* superblock compressor, %1 - override and use compressor
* specified in @compr_type)
* @compr_type: compressor type to override the superblock compressor with
* (%UBIFS_COMPR_NONE, etc)
*/
struct ubifs_mount_opts {
unsigned int unmount_mode:2;
unsigned int bulk_read:2;
unsigned int chk_data_crc:2;
unsigned int override_compr:1;
unsigned int compr_type:2;
};
/**
* struct ubifs_budg_info - UBIFS budgeting information.
* @idx_growth: amount of bytes budgeted for index growth
* @data_growth: amount of bytes budgeted for cached data
* @dd_growth: amount of bytes budgeted for cached data that will make
* other data dirty
* @uncommitted_idx: amount of bytes were budgeted for growth of the index, but
* which still have to be taken into account because the index
* has not been committed so far
* @old_idx_sz: size of index on flash
* @min_idx_lebs: minimum number of LEBs required for the index
* @nospace: non-zero if the file-system does not have flash space (used as
* optimization)
* @nospace_rp: the same as @nospace, but additionally means that even reserved
* pool is full
* @page_budget: budget for a page (constant, never changed after mount)
* @inode_budget: budget for an inode (constant, never changed after mount)
* @dent_budget: budget for a directory entry (constant, never changed after
* mount)
*/
struct ubifs_budg_info {
long long idx_growth;
long long data_growth;
long long dd_growth;
long long uncommitted_idx;
unsigned long long old_idx_sz;
int min_idx_lebs;
unsigned int nospace:1;
unsigned int nospace_rp:1;
int page_budget;
int inode_budget;
int dent_budget;
};
/**
* ubifs_stats_info - per-FS statistics information.
* @magic_errors: number of bad magic numbers (will be reset with a new mount).
* @node_errors: number of bad nodes (will be reset with a new mount).
* @crc_errors: number of bad crcs (will be reset with a new mount).
*/
struct ubifs_stats_info {
unsigned int magic_errors;
unsigned int node_errors;
unsigned int crc_errors;
};
struct ubifs_debug_info;
/**
* struct ubifs_info - UBIFS file-system description data structure
* (per-superblock).
* @vfs_sb: VFS @struct super_block object
* @sup_node: The super block node as read from the device
*
* @highest_inum: highest used inode number
* @max_sqnum: current global sequence number
* @cmt_no: commit number of the last successfully completed commit, protected
* by @commit_sem
* @cnt_lock: protects @highest_inum and @max_sqnum counters
* @fmt_version: UBIFS on-flash format version
* @ro_compat_version: R/O compatibility version
* @uuid: UUID from super block
*
* @lhead_lnum: log head logical eraseblock number
* @lhead_offs: log head offset
* @ltail_lnum: log tail logical eraseblock number (offset is always 0)
* @log_mutex: protects the log, @lhead_lnum, @lhead_offs, @ltail_lnum, and
* @bud_bytes
* @min_log_bytes: minimum required number of bytes in the log
* @cmt_bud_bytes: used during commit to temporarily amount of bytes in
* committed buds
*
* @buds: tree of all buds indexed by bud LEB number
* @bud_bytes: how many bytes of flash is used by buds
* @buds_lock: protects the @buds tree, @bud_bytes, and per-journal head bud
* lists
* @jhead_cnt: count of journal heads
* @jheads: journal heads (head zero is base head)
* @max_bud_bytes: maximum number of bytes allowed in buds
* @bg_bud_bytes: number of bud bytes when background commit is initiated
* @old_buds: buds to be released after commit ends
* @max_bud_cnt: maximum number of buds
*
* @commit_sem: synchronizes committer with other processes
* @cmt_state: commit state
* @cs_lock: commit state lock
* @cmt_wq: wait queue to sleep on if the log is full and a commit is running
*
* @big_lpt: flag that LPT is too big to write whole during commit
* @space_fixup: flag indicating that free space in LEBs needs to be cleaned up
* @double_hash: flag indicating that we can do lookups by hash
* @encrypted: flag indicating that this file system contains encrypted files
* @no_chk_data_crc: do not check CRCs when reading data nodes (except during
* recovery)
* @bulk_read: enable bulk-reads
* @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
* @rw_incompat: the media is not R/W compatible
* @assert_action: action to take when a ubifs_assert() fails
* @authenticated: flag indigating the FS is mounted in authenticated mode
*
* @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
* @calc_idx_sz
* @zroot: zbranch which points to the root index node and znode
* @cnext: next znode to commit
* @enext: next znode to commit to empty space
* @gap_lebs: array of LEBs used by the in-gaps commit method
* @cbuf: commit buffer
* @ileb_buf: buffer for commit in-the-gaps method
* @ileb_len: length of data in ileb_buf
* @ihead_lnum: LEB number of index head
* @ihead_offs: offset of index head
* @ilebs: pre-allocated index LEBs
* @ileb_cnt: number of pre-allocated index LEBs
* @ileb_nxt: next pre-allocated index LEBs
* @old_idx: tree of index nodes obsoleted since the last commit start
* @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c
*
* @mst_node: master node
* @mst_offs: offset of valid master node
*
* @max_bu_buf_len: maximum bulk-read buffer length
* @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
* @bu: pre-allocated bulk-read information
*
* @write_reserve_mutex: protects @write_reserve_buf
* @write_reserve_buf: on the write path we allocate memory, which might
* sometimes be unavailable, in which case we use this
* write reserve buffer
*
* @log_lebs: number of logical eraseblocks in the log
* @log_bytes: log size in bytes
* @log_last: last LEB of the log
* @lpt_lebs: number of LEBs used for lprops table
* @lpt_first: first LEB of the lprops table area
* @lpt_last: last LEB of the lprops table area
* @orph_lebs: number of LEBs used for the orphan area
* @orph_first: first LEB of the orphan area
* @orph_last: last LEB of the orphan area
* @main_lebs: count of LEBs in the main area
* @main_first: first LEB of the main area
* @main_bytes: main area size in bytes
*
* @key_hash_type: type of the key hash
* @key_hash: direntry key hash function
* @key_fmt: key format
* @key_len: key length
* @hash_len: The length of the index node hashes
* @fanout: fanout of the index tree (number of links per indexing node)
*
* @min_io_size: minimal input/output unit size
* @min_io_shift: number of bits in @min_io_size minus one
* @max_write_size: maximum amount of bytes the underlying flash can write at a
* time (MTD write buffer size)
* @max_write_shift: number of bits in @max_write_size minus one
* @leb_size: logical eraseblock size in bytes
* @leb_start: starting offset of logical eraseblocks within physical
* eraseblocks
* @half_leb_size: half LEB size
* @idx_leb_size: how many bytes of an LEB are effectively available when it is
* used to store indexing nodes (@leb_size - @max_idx_node_sz)
* @leb_cnt: count of logical eraseblocks
* @max_leb_cnt: maximum count of logical eraseblocks
* @ro_media: the underlying UBI volume is read-only
* @ro_mount: the file-system was mounted as read-only
* @ro_error: UBIFS switched to R/O mode because an error happened
*
* @dirty_pg_cnt: number of dirty pages (not used)
* @dirty_zn_cnt: number of dirty znodes
* @clean_zn_cnt: number of clean znodes
*
* @space_lock: protects @bi and @lst
* @lst: lprops statistics
* @bi: budgeting information
* @calc_idx_sz: temporary variable which is used to calculate new index size
* (contains accurate new index size at end of TNC commit start)
*
* @ref_node_alsz: size of the LEB reference node aligned to the min. flash
* I/O unit
* @mst_node_alsz: master node aligned size
* @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
* @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
* @max_inode_sz: maximum possible inode size in bytes
* @max_znode_sz: size of znode in bytes
*
* @leb_overhead: how many bytes are wasted in an LEB when it is filled with
* data nodes of maximum size - used in free space reporting
* @dead_wm: LEB dead space watermark
* @dark_wm: LEB dark space watermark
* @block_cnt: count of 4KiB blocks on the FS
*
* @ranges: UBIFS node length ranges
* @ubi: UBI volume descriptor
* @di: UBI device information
* @vi: UBI volume information
*
* @orph_tree: rb-tree of orphan inode numbers
* @orph_list: list of orphan inode numbers in order added
* @orph_new: list of orphan inode numbers added since last commit
* @orph_cnext: next orphan to commit
* @orph_dnext: next orphan to delete
* @orphan_lock: lock for orph_tree and orph_new
* @orph_buf: buffer for orphan nodes
* @new_orphans: number of orphans since last commit
* @cmt_orphans: number of orphans being committed
* @tot_orphans: number of orphans in the rb_tree
* @max_orphans: maximum number of orphans allowed
* @ohead_lnum: orphan head LEB number
* @ohead_offs: orphan head offset
* @no_orphs: non-zero if there are no orphans
*
* @bgt: UBIFS background thread
* @bgt_name: background thread name
* @need_bgt: if background thread should run
* @need_wbuf_sync: if write-buffers have to be synchronized
*
* @gc_lnum: LEB number used for garbage collection
* @sbuf: a buffer of LEB size used by GC and replay for scanning
* @idx_gc: list of index LEBs that have been garbage collected
* @idx_gc_cnt: number of elements on the idx_gc list
* @gc_seq: incremented for every non-index LEB garbage collected
* @gced_lnum: last non-index LEB that was garbage collected
*
* @infos_list: links all 'ubifs_info' objects
* @umount_mutex: serializes shrinker and un-mount
* @shrinker_run_no: shrinker run number
*
* @space_bits: number of bits needed to record free or dirty space
* @lpt_lnum_bits: number of bits needed to record a LEB number in the LPT
* @lpt_offs_bits: number of bits needed to record an offset in the LPT
* @lpt_spc_bits: number of bits needed to space in the LPT
* @pcnt_bits: number of bits needed to record pnode or nnode number
* @lnum_bits: number of bits needed to record LEB number
* @nnode_sz: size of on-flash nnode
* @pnode_sz: size of on-flash pnode
* @ltab_sz: size of on-flash LPT lprops table
* @lsave_sz: size of on-flash LPT save table
* @pnode_cnt: number of pnodes
* @nnode_cnt: number of nnodes
* @lpt_hght: height of the LPT
* @pnodes_have: number of pnodes in memory
*
* @lp_mutex: protects lprops table and all the other lprops-related fields
* @lpt_lnum: LEB number of the root nnode of the LPT
* @lpt_offs: offset of the root nnode of the LPT
* @nhead_lnum: LEB number of LPT head
* @nhead_offs: offset of LPT head
* @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab
* @dirty_nn_cnt: number of dirty nnodes
* @dirty_pn_cnt: number of dirty pnodes
* @check_lpt_free: flag that indicates LPT GC may be needed
* @lpt_sz: LPT size
* @lpt_nod_buf: buffer for an on-flash nnode or pnode
* @lpt_buf: buffer of LEB size used by LPT
* @nroot: address in memory of the root nnode of the LPT
* @lpt_cnext: next LPT node to commit
* @lpt_heap: array of heaps of categorized lprops
* @dirty_idx: a (reverse sorted) copy of the LPROPS_DIRTY_IDX heap as at
* previous commit start
* @uncat_list: list of un-categorized LEBs
* @empty_list: list of empty LEBs
* @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
* @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
* @freeable_cnt: number of freeable LEBs in @freeable_list
* @in_a_category_cnt: count of lprops which are in a certain category, which
* basically meants that they were loaded from the flash
*
* @ltab_lnum: LEB number of LPT's own lprops table
* @ltab_offs: offset of LPT's own lprops table
* @ltab: LPT's own lprops table
* @ltab_cmt: LPT's own lprops table (commit copy)
* @lsave_cnt: number of LEB numbers in LPT's save table
* @lsave_lnum: LEB number of LPT's save table
* @lsave_offs: offset of LPT's save table
* @lsave: LPT's save table
* @lscan_lnum: LEB number of last LPT scan
*
* @rp_size: size of the reserved pool in bytes
* @report_rp_size: size of the reserved pool reported to user-space
* @rp_uid: reserved pool user ID
* @rp_gid: reserved pool group ID
*
* @hash_tfm: the hash transformation used for hashing nodes
* @hmac_tfm: the HMAC transformation for this filesystem
* @hmac_desc_len: length of the HMAC used for authentication
* @auth_key_name: the authentication key name
* @auth_hash_name: the name of the hash algorithm used for authentication
* @auth_hash_algo: the authentication hash used for this fs
ubifs: Add authentication nodes to journal Nodes that are written to flash can only be authenticated through the index after the next commit. When a journal replay is necessary the nodes are not yet referenced by the index and thus can't be authenticated. This patch overcomes this situation by creating a hash over all nodes beginning from the commit start node over the reference node(s) and the buds themselves. From time to time we insert authentication nodes. Authentication nodes contain a HMAC from the current hash state, so that they can be used to authenticate a journal replay up to the point where the authentication node is. The hash is continued afterwards so that theoretically we would only have to check the HMAC of the last authentication node we find. Overall we get this picture: ,,,,,,,, ,......,........................................... ,. CS , hash1.----. hash2.----. ,. | , . |hmac . |hmac ,. v , . v . v ,.REF#0,-> bud -> bud -> bud.-> auth -> bud -> bud.-> auth ... ,..|...,........................................... , | , , | ,,,,,,,,,,,,,,, . | hash3,----. , | , |hmac , v , v , REF#1 -> bud -> bud,-> auth ... ,,,|,,,,,,,,,,,,,,,,,, v REF#2 -> ... | V ... Note how hash3 covers CS, REF#0 and REF#1 so that it is not possible to exchange or skip any reference nodes. Unlike the picture suggests the auth nodes themselves are not hashed. With this it is possible for an offline attacker to cut each journal head or to drop the last reference node(s), but not to skip any journal heads or to reorder any operations. Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de> Signed-off-by: Richard Weinberger <richard@nod.at>
2018-09-07 14:36:36 +02:00
* @log_hash: the log hash from the commit start node up to the latest reference
* node.
*
* @empty: %1 if the UBI device is empty
* @need_recovery: %1 if the file-system needs recovery
* @replaying: %1 during journal replay
* @mounting: %1 while mounting
Rename superblock flags (MS_xyz -> SB_xyz) This is a pure automated search-and-replace of the internal kernel superblock flags. The s_flags are now called SB_*, with the names and the values for the moment mirroring the MS_* flags that they're equivalent to. Note how the MS_xyz flags are the ones passed to the mount system call, while the SB_xyz flags are what we then use in sb->s_flags. The script to do this was: # places to look in; re security/*: it generally should *not* be # touched (that stuff parses mount(2) arguments directly), but # there are two places where we really deal with superblock flags. FILES="drivers/mtd drivers/staging/lustre fs ipc mm \ include/linux/fs.h include/uapi/linux/bfs_fs.h \ security/apparmor/apparmorfs.c security/apparmor/include/lib.h" # the list of MS_... constants SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \ DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \ POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \ I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \ ACTIVE NOUSER" SED_PROG= for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done # we want files that contain at least one of MS_..., # with fs/namespace.c and fs/pnode.c excluded. L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c') for f in $L; do sed -i $f $SED_PROG; done Requested-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-11-27 13:05:09 -08:00
* @probing: %1 while attempting to mount if SB_SILENT mount flag is set
* @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
* @replay_list: temporary list used during journal replay
* @replay_buds: list of buds to replay
* @cs_sqnum: sequence number of first node in the log (commit start node)
* @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W
* mode
* @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted
* FS to R/W mode
* @size_tree: inode size information for recovery
* @mount_opts: UBIFS-specific mount options
*
* @dbg: debugging-related information
* @stats: statistics exported over sysfs
*
* @kobj: kobject for /sys/fs/ubifs/
* @kobj_unregister: completion to unregister sysfs kobject
*/
struct ubifs_info {
struct super_block *vfs_sb;
struct ubifs_sb_node *sup_node;
ino_t highest_inum;
unsigned long long max_sqnum;
unsigned long long cmt_no;
spinlock_t cnt_lock;
int fmt_version;
int ro_compat_version;
unsigned char uuid[16];
int lhead_lnum;
int lhead_offs;
int ltail_lnum;
struct mutex log_mutex;
int min_log_bytes;
long long cmt_bud_bytes;
struct rb_root buds;
long long bud_bytes;
spinlock_t buds_lock;
int jhead_cnt;
struct ubifs_jhead *jheads;
long long max_bud_bytes;
long long bg_bud_bytes;
struct list_head old_buds;
int max_bud_cnt;
struct rw_semaphore commit_sem;
int cmt_state;
spinlock_t cs_lock;
wait_queue_head_t cmt_wq;
struct kobject kobj;
struct completion kobj_unregister;
unsigned int big_lpt:1;
unsigned int space_fixup:1;
unsigned int double_hash:1;
unsigned int encrypted:1;
unsigned int no_chk_data_crc:1;
unsigned int bulk_read:1;
unsigned int default_compr:2;
unsigned int rw_incompat:1;
unsigned int assert_action:2;
unsigned int authenticated:1;
ubifs: support offline signed images HMACs can only be generated on the system the UBIFS image is running on. To support offline signed images we add a PKCS#7 signature to the UBIFS image which can be created by mkfs.ubifs. Both the master node and the superblock need to be authenticated, during normal runtime both are protected with HMACs. For offline signature support however only a single signature is desired. We add a signature covering the superblock node directly behind it. To protect the master node a hash of the master node is added to the superblock which is used when the master node doesn't contain a HMAC. Transition to a read/write filesystem is also supported. During transition first the master node is rewritten with a HMAC (implicitly, it is written anyway as the FS is marked dirty). Afterwards the superblock is rewritten with a HMAC. Once after the image has been mounted read/write it is HMAC only, the signature is no longer required or even present on the filesystem. In an offline signed image the master node is authenticated by the superblock. In a transition to r/w we have to make sure that the master node is rewritten before the superblock node. In this case the master node gets a HMAC and its authenticity no longer depends on the superblock node. There are some cases in which the current code first writes the superblock node though, so with this patch writing of the superblock node is delayed until the master node is written. Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de> Signed-off-by: Richard Weinberger <richard@nod.at>
2019-05-14 10:33:22 +02:00
unsigned int superblock_need_write:1;
struct mutex tnc_mutex;
struct ubifs_zbranch zroot;
struct ubifs_znode *cnext;
struct ubifs_znode *enext;
int *gap_lebs;
void *cbuf;
void *ileb_buf;
int ileb_len;
int ihead_lnum;
int ihead_offs;
int *ilebs;
int ileb_cnt;
int ileb_nxt;
struct rb_root old_idx;
int *bottom_up_buf;
struct ubifs_mst_node *mst_node;
int mst_offs;
int max_bu_buf_len;
struct mutex bu_mutex;
struct bu_info bu;
struct mutex write_reserve_mutex;
void *write_reserve_buf;
int log_lebs;
long long log_bytes;
int log_last;
int lpt_lebs;
int lpt_first;
int lpt_last;
int orph_lebs;
int orph_first;
int orph_last;
int main_lebs;
int main_first;
long long main_bytes;
uint8_t key_hash_type;
uint32_t (*key_hash)(const char *str, int len);
int key_fmt;
int key_len;
int hash_len;
int fanout;
int min_io_size;
int min_io_shift;
int max_write_size;
int max_write_shift;
int leb_size;
int leb_start;
int half_leb_size;
int idx_leb_size;
int leb_cnt;
int max_leb_cnt;
unsigned int ro_media:1;
unsigned int ro_mount:1;
unsigned int ro_error:1;
atomic_long_t dirty_pg_cnt;
atomic_long_t dirty_zn_cnt;
atomic_long_t clean_zn_cnt;
spinlock_t space_lock;
struct ubifs_lp_stats lst;
struct ubifs_budg_info bi;
unsigned long long calc_idx_sz;
int ref_node_alsz;
int mst_node_alsz;
int min_idx_node_sz;
int max_idx_node_sz;
long long max_inode_sz;
int max_znode_sz;
int leb_overhead;
int dead_wm;
int dark_wm;
int block_cnt;
struct ubifs_node_range ranges[UBIFS_NODE_TYPES_CNT];
struct ubi_volume_desc *ubi;
struct ubi_device_info di;
struct ubi_volume_info vi;
struct rb_root orph_tree;
struct list_head orph_list;
struct list_head orph_new;
struct ubifs_orphan *orph_cnext;
struct ubifs_orphan *orph_dnext;
spinlock_t orphan_lock;
void *orph_buf;
int new_orphans;
int cmt_orphans;
int tot_orphans;
int max_orphans;
int ohead_lnum;
int ohead_offs;
int no_orphs;
struct task_struct *bgt;
char bgt_name[sizeof(BGT_NAME_PATTERN) + 9];
int need_bgt;
int need_wbuf_sync;
int gc_lnum;
void *sbuf;
struct list_head idx_gc;
int idx_gc_cnt;
int gc_seq;
int gced_lnum;
struct list_head infos_list;
struct mutex umount_mutex;
unsigned int shrinker_run_no;
int space_bits;
int lpt_lnum_bits;
int lpt_offs_bits;
int lpt_spc_bits;
int pcnt_bits;
int lnum_bits;
int nnode_sz;
int pnode_sz;
int ltab_sz;
int lsave_sz;
int pnode_cnt;
int nnode_cnt;
int lpt_hght;
int pnodes_have;
struct mutex lp_mutex;
int lpt_lnum;
int lpt_offs;
int nhead_lnum;
int nhead_offs;
int lpt_drty_flgs;
int dirty_nn_cnt;
int dirty_pn_cnt;
int check_lpt_free;
long long lpt_sz;
void *lpt_nod_buf;
void *lpt_buf;
struct ubifs_nnode *nroot;
struct ubifs_cnode *lpt_cnext;
struct ubifs_lpt_heap lpt_heap[LPROPS_HEAP_CNT];
struct ubifs_lpt_heap dirty_idx;
struct list_head uncat_list;
struct list_head empty_list;
struct list_head freeable_list;
struct list_head frdi_idx_list;
int freeable_cnt;
int in_a_category_cnt;
int ltab_lnum;
int ltab_offs;
struct ubifs_lpt_lprops *ltab;
struct ubifs_lpt_lprops *ltab_cmt;
int lsave_cnt;
int lsave_lnum;
int lsave_offs;
int *lsave;
int lscan_lnum;
long long rp_size;
long long report_rp_size;
kuid_t rp_uid;
kgid_t rp_gid;
struct crypto_shash *hash_tfm;
struct crypto_shash *hmac_tfm;
int hmac_desc_len;
char *auth_key_name;
char *auth_hash_name;
enum hash_algo auth_hash_algo;
ubifs: Add authentication nodes to journal Nodes that are written to flash can only be authenticated through the index after the next commit. When a journal replay is necessary the nodes are not yet referenced by the index and thus can't be authenticated. This patch overcomes this situation by creating a hash over all nodes beginning from the commit start node over the reference node(s) and the buds themselves. From time to time we insert authentication nodes. Authentication nodes contain a HMAC from the current hash state, so that they can be used to authenticate a journal replay up to the point where the authentication node is. The hash is continued afterwards so that theoretically we would only have to check the HMAC of the last authentication node we find. Overall we get this picture: ,,,,,,,, ,......,........................................... ,. CS , hash1.----. hash2.----. ,. | , . |hmac . |hmac ,. v , . v . v ,.REF#0,-> bud -> bud -> bud.-> auth -> bud -> bud.-> auth ... ,..|...,........................................... , | , , | ,,,,,,,,,,,,,,, . | hash3,----. , | , |hmac , v , v , REF#1 -> bud -> bud,-> auth ... ,,,|,,,,,,,,,,,,,,,,,, v REF#2 -> ... | V ... Note how hash3 covers CS, REF#0 and REF#1 so that it is not possible to exchange or skip any reference nodes. Unlike the picture suggests the auth nodes themselves are not hashed. With this it is possible for an offline attacker to cut each journal head or to drop the last reference node(s), but not to skip any journal heads or to reorder any operations. Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de> Signed-off-by: Richard Weinberger <richard@nod.at>
2018-09-07 14:36:36 +02:00
struct shash_desc *log_hash;
/* The below fields are used only during mounting and re-mounting */
unsigned int empty:1;
unsigned int need_recovery:1;
unsigned int replaying:1;
unsigned int mounting:1;
unsigned int remounting_rw:1;
unsigned int probing:1;
struct list_head replay_list;
struct list_head replay_buds;
unsigned long long cs_sqnum;
struct list_head unclean_leb_list;
struct ubifs_mst_node *rcvrd_mst_node;
struct rb_root size_tree;
struct ubifs_mount_opts mount_opts;
struct ubifs_debug_info *dbg;
struct ubifs_stats_info *stats;
};
extern struct list_head ubifs_infos;
extern spinlock_t ubifs_infos_lock;
extern atomic_long_t ubifs_clean_zn_cnt;
extern const struct super_operations ubifs_super_operations;
extern const struct address_space_operations ubifs_file_address_operations;
extern const struct file_operations ubifs_file_operations;
extern const struct inode_operations ubifs_file_inode_operations;
extern const struct file_operations ubifs_dir_operations;
extern const struct inode_operations ubifs_dir_inode_operations;
extern const struct inode_operations ubifs_symlink_inode_operations;
extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
extern int ubifs_default_version;
/* auth.c */
static inline int ubifs_authenticated(const struct ubifs_info *c)
{
return (IS_ENABLED(CONFIG_UBIFS_FS_AUTHENTICATION)) && c->authenticated;
}
struct shash_desc *__ubifs_hash_get_desc(const struct ubifs_info *c);
static inline struct shash_desc *ubifs_hash_get_desc(const struct ubifs_info *c)
{
return ubifs_authenticated(c) ? __ubifs_hash_get_desc(c) : NULL;
}
static inline int ubifs_shash_init(const struct ubifs_info *c,
struct shash_desc *desc)
{
if (ubifs_authenticated(c))
return crypto_shash_init(desc);
else
return 0;
}
static inline int ubifs_shash_update(const struct ubifs_info *c,
struct shash_desc *desc, const void *buf,
unsigned int len)
{
int err = 0;
if (ubifs_authenticated(c)) {
err = crypto_shash_update(desc, buf, len);
if (err < 0)
return err;
}
return 0;
}
static inline int ubifs_shash_final(const struct ubifs_info *c,
struct shash_desc *desc, u8 *out)
{
return ubifs_authenticated(c) ? crypto_shash_final(desc, out) : 0;
}
int __ubifs_node_calc_hash(const struct ubifs_info *c, const void *buf,
u8 *hash);
static inline int ubifs_node_calc_hash(const struct ubifs_info *c,
const void *buf, u8 *hash)
{
if (ubifs_authenticated(c))
return __ubifs_node_calc_hash(c, buf, hash);
else
return 0;
}
int ubifs_prepare_auth_node(struct ubifs_info *c, void *node,
struct shash_desc *inhash);
/**
* ubifs_check_hash - compare two hashes
* @c: UBIFS file-system description object
* @expected: first hash
* @got: second hash
*
* Compare two hashes @expected and @got. Returns 0 when they are equal, a
* negative error code otherwise.
*/
static inline int ubifs_check_hash(const struct ubifs_info *c,
const u8 *expected, const u8 *got)
{
return crypto_memneq(expected, got, c->hash_len);
}
/**
* ubifs_check_hmac - compare two HMACs
* @c: UBIFS file-system description object
* @expected: first HMAC
* @got: second HMAC
*
* Compare two hashes @expected and @got. Returns 0 when they are equal, a
* negative error code otherwise.
*/
static inline int ubifs_check_hmac(const struct ubifs_info *c,
const u8 *expected, const u8 *got)
{
return crypto_memneq(expected, got, c->hmac_desc_len);
}
void ubifs_bad_hash(const struct ubifs_info *c, const void *node,
const u8 *hash, int lnum, int offs);
int __ubifs_node_check_hash(const struct ubifs_info *c, const void *buf,
const u8 *expected);
static inline int ubifs_node_check_hash(const struct ubifs_info *c,
const void *buf, const u8 *expected)
{
if (ubifs_authenticated(c))
return __ubifs_node_check_hash(c, buf, expected);
else
return 0;
}
int ubifs_init_authentication(struct ubifs_info *c);
void __ubifs_exit_authentication(struct ubifs_info *c);
static inline void ubifs_exit_authentication(struct ubifs_info *c)
{
if (ubifs_authenticated(c))
__ubifs_exit_authentication(c);
}
/**
* ubifs_branch_hash - returns a pointer to the hash of a branch
* @c: UBIFS file-system description object
* @br: branch to get the hash from
*
* This returns a pointer to the hash of a branch. Since the key already is a
* dynamically sized object we cannot use a struct member here.
*/
static inline u8 *ubifs_branch_hash(struct ubifs_info *c,
struct ubifs_branch *br)
{
return (void *)br + sizeof(*br) + c->key_len;
}
/**
* ubifs_copy_hash - copy a hash
* @c: UBIFS file-system description object
* @from: source hash
* @to: destination hash
*
* With authentication this copies a hash, otherwise does nothing.
*/
static inline void ubifs_copy_hash(const struct ubifs_info *c, const u8 *from,
u8 *to)
{
if (ubifs_authenticated(c))
memcpy(to, from, c->hash_len);
}
int __ubifs_node_insert_hmac(const struct ubifs_info *c, void *buf,
int len, int ofs_hmac);
static inline int ubifs_node_insert_hmac(const struct ubifs_info *c, void *buf,
int len, int ofs_hmac)
{
if (ubifs_authenticated(c))
return __ubifs_node_insert_hmac(c, buf, len, ofs_hmac);
else
return 0;
}
int __ubifs_node_verify_hmac(const struct ubifs_info *c, const void *buf,
int len, int ofs_hmac);
static inline int ubifs_node_verify_hmac(const struct ubifs_info *c,
const void *buf, int len, int ofs_hmac)
{
if (ubifs_authenticated(c))
return __ubifs_node_verify_hmac(c, buf, len, ofs_hmac);
else
return 0;
}
/**
* ubifs_auth_node_sz - returns the size of an authentication node
* @c: UBIFS file-system description object
*
* This function returns the size of an authentication node which can
* be 0 for unauthenticated filesystems or the real size of an auth node
* authentication is enabled.
*/
static inline int ubifs_auth_node_sz(const struct ubifs_info *c)
{
if (ubifs_authenticated(c))
return sizeof(struct ubifs_auth_node) + c->hmac_desc_len;
else
return 0;
}
ubifs: support offline signed images HMACs can only be generated on the system the UBIFS image is running on. To support offline signed images we add a PKCS#7 signature to the UBIFS image which can be created by mkfs.ubifs. Both the master node and the superblock need to be authenticated, during normal runtime both are protected with HMACs. For offline signature support however only a single signature is desired. We add a signature covering the superblock node directly behind it. To protect the master node a hash of the master node is added to the superblock which is used when the master node doesn't contain a HMAC. Transition to a read/write filesystem is also supported. During transition first the master node is rewritten with a HMAC (implicitly, it is written anyway as the FS is marked dirty). Afterwards the superblock is rewritten with a HMAC. Once after the image has been mounted read/write it is HMAC only, the signature is no longer required or even present on the filesystem. In an offline signed image the master node is authenticated by the superblock. In a transition to r/w we have to make sure that the master node is rewritten before the superblock node. In this case the master node gets a HMAC and its authenticity no longer depends on the superblock node. There are some cases in which the current code first writes the superblock node though, so with this patch writing of the superblock node is delayed until the master node is written. Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de> Signed-off-by: Richard Weinberger <richard@nod.at>
2019-05-14 10:33:22 +02:00
int ubifs_sb_verify_signature(struct ubifs_info *c,
const struct ubifs_sb_node *sup);
bool ubifs_hmac_zero(struct ubifs_info *c, const u8 *hmac);
int ubifs_hmac_wkm(struct ubifs_info *c, u8 *hmac);
int __ubifs_shash_copy_state(const struct ubifs_info *c, struct shash_desc *src,
struct shash_desc *target);
static inline int ubifs_shash_copy_state(const struct ubifs_info *c,
struct shash_desc *src,
struct shash_desc *target)
{
if (ubifs_authenticated(c))
return __ubifs_shash_copy_state(c, src, target);
else
return 0;
}
/* io.c */
void ubifs_ro_mode(struct ubifs_info *c, int err);
int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
int len, int even_ebadmsg);
int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
int len);
int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len);
int ubifs_leb_unmap(struct ubifs_info *c, int lnum);
int ubifs_leb_map(struct ubifs_info *c, int lnum);
int ubifs_is_mapped(const struct ubifs_info *c, int lnum);
int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs);
int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf);
int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
int lnum, int offs);
int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
int lnum, int offs);
int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
int offs);
int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum,
int offs, int hmac_offs);
int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len,
int lnum, int offs, int quiet, int must_chk_crc);
void ubifs_init_node(struct ubifs_info *c, void *buf, int len, int pad);
void ubifs_crc_node(struct ubifs_info *c, void *buf, int len);
void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
int ubifs_prepare_node_hmac(struct ubifs_info *c, void *node, int len,
int hmac_offs, int pad);
void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
int ubifs_io_init(struct ubifs_info *c);
void ubifs_pad(const struct ubifs_info *c, void *buf, int pad);
int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf);
int ubifs_bg_wbufs_sync(struct ubifs_info *c);
void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum);
int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode);
/* scan.c */
struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
int offs, void *sbuf, int quiet);
void ubifs_scan_destroy(struct ubifs_scan_leb *sleb);
int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
int offs, int quiet);
struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
int offs, void *sbuf);
void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
int lnum, int offs);
int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
void *buf, int offs);
void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
void *buf);
/* log.c */
void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud);
void ubifs_create_buds_lists(struct ubifs_info *c);
int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs);
struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum);
struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum);
int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum);
int ubifs_log_end_commit(struct ubifs_info *c, int new_ltail_lnum);
int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum);
int ubifs_consolidate_log(struct ubifs_info *c);
/* journal.c */
int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
const struct fscrypt_name *nm, const struct inode *inode,
int deletion, int xent);
int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
const union ubifs_key *key, const void *buf, int len);
int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode);
int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode);
int ubifs_jnl_xrename(struct ubifs_info *c, const struct inode *fst_dir,
const struct inode *fst_inode,
const struct fscrypt_name *fst_nm,
const struct inode *snd_dir,
const struct inode *snd_inode,
const struct fscrypt_name *snd_nm, int sync);
int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
const struct inode *old_inode,
const struct fscrypt_name *old_nm,
const struct inode *new_dir,
const struct inode *new_inode,
const struct fscrypt_name *new_nm,
const struct inode *whiteout, int sync);
int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
loff_t old_size, loff_t new_size);
int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
const struct inode *inode, const struct fscrypt_name *nm);
int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode1,
const struct inode *inode2);
/* budget.c */
int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req);
void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req);
void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
struct ubifs_inode *ui);
int ubifs_budget_inode_op(struct ubifs_info *c, struct inode *inode,
struct ubifs_budget_req *req);
void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
struct ubifs_budget_req *req);
void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
struct ubifs_budget_req *req);
UBIFS: improve statfs reporting even more Since free space we report in statfs is file size which should fit to the FS - change the way we calculate free space and use leb_overhead instead of dark_wm in calculations. Results of "freespace" test (120MiB volume, 16KiB LEB size, 512 bytes page size). Before the change: freespace: Test 1: fill the space we have 3 times freespace: was free: 85204992 bytes 81.3 MiB, wrote: 96489472 bytes 92.0 MiB, delta: 11284480 bytes 10.8 MiB, wrote 13.2% more than predicted freespace: was free: 83554304 bytes 79.7 MiB, wrote: 96489472 bytes 92.0 MiB, delta: 12935168 bytes 12.3 MiB, wrote 15.5% more than predicted freespace: was free: 83554304 bytes 79.7 MiB, wrote: 96493568 bytes 92.0 MiB, delta: 12939264 bytes 12.3 MiB, wrote 15.5% more than predicted freespace: Test 1 finished freespace: Test 2: gradually lessen amount of free space and fill the FS freespace: do 10 steps, lessen free space by 7596218 bytes 7.2 MiB each time freespace: was free: 78675968 bytes 75.0 MiB, wrote: 88903680 bytes 84.8 MiB, delta: 10227712 bytes 9.8 MiB, wrote 13.0% more than predicted freespace: was free: 72015872 bytes 68.7 MiB, wrote: 81514496 bytes 77.7 MiB, delta: 9498624 bytes 9.1 MiB, wrote 13.2% more than predicted freespace: was free: 63938560 bytes 61.0 MiB, wrote: 72589312 bytes 69.2 MiB, delta: 8650752 bytes 8.2 MiB, wrote 13.5% more than predicted freespace: was free: 56127488 bytes 53.5 MiB, wrote: 63762432 bytes 60.8 MiB, delta: 7634944 bytes 7.3 MiB, wrote 13.6% more than predicted freespace: was free: 48336896 bytes 46.1 MiB, wrote: 54935552 bytes 52.4 MiB, delta: 6598656 bytes 6.3 MiB, wrote 13.7% more than predicted freespace: was free: 40587264 bytes 38.7 MiB, wrote: 46157824 bytes 44.0 MiB, delta: 5570560 bytes 5.3 MiB, wrote 13.7% more than predicted freespace: was free: 32841728 bytes 31.3 MiB, wrote: 37384192 bytes 35.7 MiB, delta: 4542464 bytes 4.3 MiB, wrote 13.8% more than predicted freespace: was free: 25100288 bytes 23.9 MiB, wrote: 28618752 bytes 27.3 MiB, delta: 3518464 bytes 3.4 MiB, wrote 14.0% more than predicted freespace: was free: 17342464 bytes 16.5 MiB, wrote: 19841024 bytes 18.9 MiB, delta: 2498560 bytes 2.4 MiB, wrote 14.4% more than predicted freespace: was free: 9605120 bytes 9.2 MiB, wrote: 11063296 bytes 10.6 MiB, delta: 1458176 bytes 1.4 MiB, wrote 15.2% more than predicted freespace: Test 2 finished freespace: Test 3: gradually lessen amount of free space by trashing and fill the FS freespace: do 10 steps, lessen free space by 7606272 bytes 7.3 MiB each time freespace: trashing: was free: 83668992 bytes 79.8 MiB, need free: 7606272 bytes 7.3 MiB, files created: 248297, delete 225724 (90.9% of them) freespace: was free: 70803456 bytes 67.5 MiB, wrote: 82485248 bytes 78.7 MiB, delta: 11681792 bytes 11.1 MiB, wrote 16.5% more than predicted freespace: trashing: was free: 81080320 bytes 77.3 MiB, need free: 15212544 bytes 14.5 MiB, files created: 248711, delete 202047 (81.2% of them) freespace: was free: 59867136 bytes 57.1 MiB, wrote: 71897088 bytes 68.6 MiB, delta: 12029952 bytes 11.5 MiB, wrote 20.1% more than predicted freespace: trashing: was free: 82243584 bytes 78.4 MiB, need free: 22818816 bytes 21.8 MiB, files created: 248866, delete 179817 (72.3% of them) freespace: was free: 50905088 bytes 48.5 MiB, wrote: 63168512 bytes 60.2 MiB, delta: 12263424 bytes 11.7 MiB, wrote 24.1% more than predicted freespace: trashing: was free: 83402752 bytes 79.5 MiB, need free: 30425088 bytes 29.0 MiB, files created: 248920, delete 158114 (63.5% of them) freespace: was free: 42651648 bytes 40.7 MiB, wrote: 55406592 bytes 52.8 MiB, delta: 12754944 bytes 12.2 MiB, wrote 29.9% more than predicted freespace: trashing: was free: 84402176 bytes 80.5 MiB, need free: 38031360 bytes 36.3 MiB, files created: 248709, delete 136641 (54.9% of them) freespace: was free: 35233792 bytes 33.6 MiB, wrote: 48250880 bytes 46.0 MiB, delta: 13017088 bytes 12.4 MiB, wrote 36.9% more than predicted freespace: trashing: was free: 82530304 bytes 78.7 MiB, need free: 45637632 bytes 43.5 MiB, files created: 248778, delete 111208 (44.7% of them) freespace: was free: 27287552 bytes 26.0 MiB, wrote: 40267776 bytes 38.4 MiB, delta: 12980224 bytes 12.4 MiB, wrote 47.6% more than predicted freespace: trashing: was free: 85114880 bytes 81.2 MiB, need free: 53243904 bytes 50.8 MiB, files created: 248508, delete 93052 (37.4% of them) freespace: was free: 22437888 bytes 21.4 MiB, wrote: 35328000 bytes 33.7 MiB, delta: 12890112 bytes 12.3 MiB, wrote 57.4% more than predicted freespace: trashing: was free: 84103168 bytes 80.2 MiB, need free: 60850176 bytes 58.0 MiB, files created: 248637, delete 68743 (27.6% of them) freespace: was free: 15536128 bytes 14.8 MiB, wrote: 28319744 bytes 27.0 MiB, delta: 12783616 bytes 12.2 MiB, wrote 82.3% more than predicted freespace: trashing: was free: 84357120 bytes 80.4 MiB, need free: 68456448 bytes 65.3 MiB, files created: 248567, delete 46852 (18.8% of them) freespace: was free: 9015296 bytes 8.6 MiB, wrote: 22044672 bytes 21.0 MiB, delta: 13029376 bytes 12.4 MiB, wrote 144.5% more than predicted freespace: trashing: was free: 84942848 bytes 81.0 MiB, need free: 76062720 bytes 72.5 MiB, files created: 248636, delete 25993 (10.5% of them) freespace: was free: 6086656 bytes 5.8 MiB, wrote: 8331264 bytes 7.9 MiB, delta: 2244608 bytes 2.1 MiB, wrote 36.9% more than predicted freespace: Test 3 finished freespace: finished successfully After the change: freespace: Test 1: fill the space we have 3 times freespace: was free: 94048256 bytes 89.7 MiB, wrote: 96489472 bytes 92.0 MiB, delta: 2441216 bytes 2.3 MiB, wrote 2.6% more than predicted freespace: was free: 92246016 bytes 88.0 MiB, wrote: 96493568 bytes 92.0 MiB, delta: 4247552 bytes 4.1 MiB, wrote 4.6% more than predicted freespace: was free: 92254208 bytes 88.0 MiB, wrote: 96489472 bytes 92.0 MiB, delta: 4235264 bytes 4.0 MiB, wrote 4.6% more than predicted freespace: Test 1 finished freespace: Test 2: gradually lessen amount of free space and fill the FS freespace: do 10 steps, lessen free space by 8386001 bytes 8.0 MiB each time freespace: was free: 86605824 bytes 82.6 MiB, wrote: 88252416 bytes 84.2 MiB, delta: 1646592 bytes 1.6 MiB, wrote 1.9% more than predicted freespace: was free: 78667776 bytes 75.0 MiB, wrote: 80715776 bytes 77.0 MiB, delta: 2048000 bytes 2.0 MiB, wrote 2.6% more than predicted freespace: was free: 69615616 bytes 66.4 MiB, wrote: 71630848 bytes 68.3 MiB, delta: 2015232 bytes 1.9 MiB, wrote 2.9% more than predicted freespace: was free: 61018112 bytes 58.2 MiB, wrote: 62783488 bytes 59.9 MiB, delta: 1765376 bytes 1.7 MiB, wrote 2.9% more than predicted freespace: was free: 52424704 bytes 50.0 MiB, wrote: 53968896 bytes 51.5 MiB, delta: 1544192 bytes 1.5 MiB, wrote 2.9% more than predicted freespace: was free: 43880448 bytes 41.8 MiB, wrote: 45199360 bytes 43.1 MiB, delta: 1318912 bytes 1.3 MiB, wrote 3.0% more than predicted freespace: was free: 35332096 bytes 33.7 MiB, wrote: 36425728 bytes 34.7 MiB, delta: 1093632 bytes 1.0 MiB, wrote 3.1% more than predicted freespace: was free: 26771456 bytes 25.5 MiB, wrote: 27643904 bytes 26.4 MiB, delta: 872448 bytes 852.0 KiB, wrote 3.3% more than predicted freespace: was free: 18231296 bytes 17.4 MiB, wrote: 18878464 bytes 18.0 MiB, delta: 647168 bytes 632.0 KiB, wrote 3.5% more than predicted freespace: was free: 9674752 bytes 9.2 MiB, wrote: 10088448 bytes 9.6 MiB, delta: 413696 bytes 404.0 KiB, wrote 4.3% more than predicted freespace: Test 2 finished freespace: Test 3: gradually lessen amount of free space by trashing and fill the FS freespace: do 10 steps, lessen free space by 8397544 bytes 8.0 MiB each time freespace: trashing: was free: 92372992 bytes 88.1 MiB, need free: 8397552 bytes 8.0 MiB, files created: 248296, delete 225723 (90.9% of them) freespace: was free: 71909376 bytes 68.6 MiB, wrote: 82472960 bytes 78.7 MiB, delta: 10563584 bytes 10.1 MiB, wrote 14.7% more than predicted freespace: trashing: was free: 88989696 bytes 84.9 MiB, need free: 16795096 bytes 16.0 MiB, files created: 248794, delete 201838 (81.1% of them) freespace: was free: 60354560 bytes 57.6 MiB, wrote: 71782400 bytes 68.5 MiB, delta: 11427840 bytes 10.9 MiB, wrote 18.9% more than predicted freespace: trashing: was free: 90304512 bytes 86.1 MiB, need free: 25192640 bytes 24.0 MiB, files created: 248733, delete 179342 (72.1% of them) freespace: was free: 51187712 bytes 48.8 MiB, wrote: 62943232 bytes 60.0 MiB, delta: 11755520 bytes 11.2 MiB, wrote 23.0% more than predicted freespace: trashing: was free: 91209728 bytes 87.0 MiB, need free: 33590184 bytes 32.0 MiB, files created: 248779, delete 157160 (63.2% of them) freespace: was free: 42704896 bytes 40.7 MiB, wrote: 55050240 bytes 52.5 MiB, delta: 12345344 bytes 11.8 MiB, wrote 28.9% more than predicted freespace: trashing: was free: 92700672 bytes 88.4 MiB, need free: 41987728 bytes 40.0 MiB, files created: 248848, delete 136135 (54.7% of them) freespace: was free: 35250176 bytes 33.6 MiB, wrote: 48115712 bytes 45.9 MiB, delta: 12865536 bytes 12.3 MiB, wrote 36.5% more than predicted freespace: trashing: was free: 93986816 bytes 89.6 MiB, need free: 50385272 bytes 48.1 MiB, files created: 248723, delete 115385 (46.4% of them) freespace: was free: 29995008 bytes 28.6 MiB, wrote: 41582592 bytes 39.7 MiB, delta: 11587584 bytes 11.1 MiB, wrote 38.6% more than predicted freespace: trashing: was free: 91881472 bytes 87.6 MiB, need free: 58782816 bytes 56.1 MiB, files created: 248645, delete 89569 (36.0% of them) freespace: was free: 22511616 bytes 21.5 MiB, wrote: 34705408 bytes 33.1 MiB, delta: 12193792 bytes 11.6 MiB, wrote 54.2% more than predicted freespace: trashing: was free: 91774976 bytes 87.5 MiB, need free: 67180360 bytes 64.1 MiB, files created: 248580, delete 66616 (26.8% of them) freespace: was free: 16908288 bytes 16.1 MiB, wrote: 26898432 bytes 25.7 MiB, delta: 9990144 bytes 9.5 MiB, wrote 59.1% more than predicted freespace: trashing: was free: 92450816 bytes 88.2 MiB, need free: 75577904 bytes 72.1 MiB, files created: 248654, delete 45381 (18.3% of them) freespace: was free: 10170368 bytes 9.7 MiB, wrote: 19111936 bytes 18.2 MiB, delta: 8941568 bytes 8.5 MiB, wrote 87.9% more than predicted freespace: trashing: was free: 93282304 bytes 89.0 MiB, need free: 83975448 bytes 80.1 MiB, files created: 248513, delete 24794 (10.0% of them) freespace: was free: 3911680 bytes 3.7 MiB, wrote: 7872512 bytes 7.5 MiB, delta: 3960832 bytes 3.8 MiB, wrote 101.3% more than predicted freespace: Test 3 finished freespace: finished successfully Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
2008-08-25 18:58:19 +03:00
long long ubifs_get_free_space(struct ubifs_info *c);
long long ubifs_get_free_space_nolock(struct ubifs_info *c);
int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
void ubifs_convert_page_budget(struct ubifs_info *c);
long long ubifs_reported_space(const struct ubifs_info *c, long long free);
long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
/* find.c */
int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
int squeeze);
int ubifs_find_free_leb_for_idx(struct ubifs_info *c);
int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
int min_space, int pick_free);
int ubifs_find_dirty_idx_leb(struct ubifs_info *c);
int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
/* tnc.c */
int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
struct ubifs_znode **zn, int *n);
int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
void *node, const struct fscrypt_name *nm);
int ubifs_tnc_lookup_dh(struct ubifs_info *c, const union ubifs_key *key,
void *node, uint32_t secondary_hash);
int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
void *node, int *lnum, int *offs);
int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
int offs, int len, const u8 *hash);
int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
int old_lnum, int old_offs, int lnum, int offs, int len);
int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
int lnum, int offs, int len, const u8 *hash,
const struct fscrypt_name *nm);
int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key);
int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
const struct fscrypt_name *nm);
int ubifs_tnc_remove_dh(struct ubifs_info *c, const union ubifs_key *key,
uint32_t cookie);
int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
union ubifs_key *to_key);
int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum);
struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
union ubifs_key *key,
const struct fscrypt_name *nm);
void ubifs_tnc_close(struct ubifs_info *c);
int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level,
int lnum, int offs, int is_idx);
int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level,
int lnum, int offs);
/* Shared by tnc.c for tnc_commit.c */
void destroy_old_idx(struct ubifs_info *c);
int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
int lnum, int offs);
int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode);
int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu);
int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu);
/* tnc_misc.c */
struct ubifs_znode *ubifs_tnc_levelorder_next(const struct ubifs_info *c,
struct ubifs_znode *zr,
struct ubifs_znode *znode);
int ubifs_search_zbranch(const struct ubifs_info *c,
const struct ubifs_znode *znode,
const union ubifs_key *key, int *n);
struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode);
struct ubifs_znode *ubifs_tnc_postorder_next(const struct ubifs_info *c,
struct ubifs_znode *znode);
long ubifs_destroy_tnc_subtree(const struct ubifs_info *c,
struct ubifs_znode *zr);
struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
struct ubifs_zbranch *zbr,
struct ubifs_znode *parent, int iip);
int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
void *node);
/* tnc_commit.c */
int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
int ubifs_tnc_end_commit(struct ubifs_info *c);
/* shrinker.c */
fs: convert fs shrinkers to new scan/count API Convert the filesystem shrinkers to use the new API, and standardise some of the behaviours of the shrinkers at the same time. For example, nr_to_scan means the number of objects to scan, not the number of objects to free. I refactored the CIFS idmap shrinker a little - it really needs to be broken up into a shrinker per tree and keep an item count with the tree root so that we don't need to walk the tree every time the shrinker needs to count the number of objects in the tree (i.e. all the time under memory pressure). [glommer@openvz.org: fixes for ext4, ubifs, nfs, cifs and glock. Fixes are needed mainly due to new code merged in the tree] [assorted fixes folded in] Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Glauber Costa <glommer@openvz.org> Acked-by: Mel Gorman <mgorman@suse.de> Acked-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Acked-by: Jan Kara <jack@suse.cz> Acked-by: Steven Whitehouse <swhiteho@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: "Theodore Ts'o" <tytso@mit.edu> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Cc: Arve Hjønnevåg <arve@android.com> Cc: Carlos Maiolino <cmaiolino@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Chuck Lever <chuck.lever@oracle.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: David Rientjes <rientjes@google.com> Cc: Gleb Natapov <gleb@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: J. Bruce Fields <bfields@redhat.com> Cc: Jan Kara <jack@suse.cz> Cc: Jerome Glisse <jglisse@redhat.com> Cc: John Stultz <john.stultz@linaro.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Kent Overstreet <koverstreet@google.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Steven Whitehouse <swhiteho@redhat.com> Cc: Thomas Hellstrom <thellstrom@vmware.com> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2013-08-28 10:18:09 +10:00
unsigned long ubifs_shrink_scan(struct shrinker *shrink,
struct shrink_control *sc);
unsigned long ubifs_shrink_count(struct shrinker *shrink,
struct shrink_control *sc);
/* commit.c */
int ubifs_bg_thread(void *info);
void ubifs_commit_required(struct ubifs_info *c);
void ubifs_request_bg_commit(struct ubifs_info *c);
int ubifs_run_commit(struct ubifs_info *c);
void ubifs_recovery_commit(struct ubifs_info *c);
int ubifs_gc_should_commit(struct ubifs_info *c);
void ubifs_wait_for_commit(struct ubifs_info *c);
/* master.c */
int ubifs_compare_master_node(struct ubifs_info *c, void *m1, void *m2);
int ubifs_read_master(struct ubifs_info *c);
int ubifs_write_master(struct ubifs_info *c);
/* sb.c */
int ubifs_read_superblock(struct ubifs_info *c);
int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
int ubifs_fixup_free_space(struct ubifs_info *c);
int ubifs_enable_encryption(struct ubifs_info *c);
/* replay.c */
int ubifs_validate_entry(struct ubifs_info *c,
const struct ubifs_dent_node *dent);
int ubifs_replay_journal(struct ubifs_info *c);
/* gc.c */
int ubifs_garbage_collect(struct ubifs_info *c, int anyway);
int ubifs_gc_start_commit(struct ubifs_info *c);
int ubifs_gc_end_commit(struct ubifs_info *c);
void ubifs_destroy_idx_gc(struct ubifs_info *c);
int ubifs_get_idx_gc_leb(struct ubifs_info *c);
int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp);
/* orphan.c */
int ubifs_add_orphan(struct ubifs_info *c, ino_t inum);
void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum);
int ubifs_orphan_start_commit(struct ubifs_info *c);
int ubifs_orphan_end_commit(struct ubifs_info *c);
int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only);
int ubifs_clear_orphans(struct ubifs_info *c);
/* lpt.c */
int ubifs_calc_lpt_geom(struct ubifs_info *c);
int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
int *lpt_lebs, int *big_lpt, u8 *hash);
int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr);
struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum);
struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum);
int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum,
ubifs_lpt_scan_callback scan_cb, void *data);
/* Shared by lpt.c for lpt_commit.c */
void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave);
void ubifs_pack_ltab(struct ubifs_info *c, void *buf,
struct ubifs_lpt_lprops *ltab);
void ubifs_pack_pnode(struct ubifs_info *c, void *buf,
struct ubifs_pnode *pnode);
void ubifs_pack_nnode(struct ubifs_info *c, void *buf,
struct ubifs_nnode *nnode);
struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c,
struct ubifs_nnode *parent, int iip);
struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c,
struct ubifs_nnode *parent, int iip);
struct ubifs_pnode *ubifs_pnode_lookup(struct ubifs_info *c, int i);
int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip);
void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty);
void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode);
uint32_t ubifs_unpack_bits(const struct ubifs_info *c, uint8_t **addr, int *pos, int nrbits);
struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght);
/* Needed only in debugging code in lpt_commit.c */
int ubifs_unpack_nnode(const struct ubifs_info *c, void *buf,
struct ubifs_nnode *nnode);
int ubifs_lpt_calc_hash(struct ubifs_info *c, u8 *hash);
/* lpt_commit.c */
int ubifs_lpt_start_commit(struct ubifs_info *c);
int ubifs_lpt_end_commit(struct ubifs_info *c);
int ubifs_lpt_post_commit(struct ubifs_info *c);
void ubifs_lpt_free(struct ubifs_info *c, int wr_only);
/* lprops.c */
const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
const struct ubifs_lprops *lp,
int free, int dirty, int flags,
int idx_gc_cnt);
void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *lst);
void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
int cat);
void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
struct ubifs_lprops *new_lprops);
void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops);
int ubifs_categorize_lprops(const struct ubifs_info *c,
const struct ubifs_lprops *lprops);
int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
int flags_set, int flags_clean, int idx_gc_cnt);
int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
int flags_set, int flags_clean);
int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp);
const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c);
const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c);
const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c);
const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
int ubifs_calc_dark(const struct ubifs_info *c, int spc);
/* file.c */
int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
struct iattr *attr);
vfs: change inode times to use struct timespec64 struct timespec is not y2038 safe. Transition vfs to use y2038 safe struct timespec64 instead. The change was made with the help of the following cocinelle script. This catches about 80% of the changes. All the header file and logic changes are included in the first 5 rules. The rest are trivial substitutions. I avoid changing any of the function signatures or any other filesystem specific data structures to keep the patch simple for review. The script can be a little shorter by combining different cases. But, this version was sufficient for my usecase. virtual patch @ depends on patch @ identifier now; @@ - struct timespec + struct timespec64 current_time ( ... ) { - struct timespec now = current_kernel_time(); + struct timespec64 now = current_kernel_time64(); ... - return timespec_trunc( + return timespec64_trunc( ... ); } @ depends on patch @ identifier xtime; @@ struct \( iattr \| inode \| kstat \) { ... - struct timespec xtime; + struct timespec64 xtime; ... } @ depends on patch @ identifier t; @@ struct inode_operations { ... int (*update_time) (..., - struct timespec t, + struct timespec64 t, ...); ... } @ depends on patch @ identifier t; identifier fn_update_time =~ "update_time$"; @@ fn_update_time (..., - struct timespec *t, + struct timespec64 *t, ...) { ... } @ depends on patch @ identifier t; @@ lease_get_mtime( ... , - struct timespec *t + struct timespec64 *t ) { ... } @te depends on patch forall@ identifier ts; local idexpression struct inode *inode_node; identifier i_xtime =~ "^i_[acm]time$"; identifier ia_xtime =~ "^ia_[acm]time$"; identifier fn_update_time =~ "update_time$"; identifier fn; expression e, E3; local idexpression struct inode *node1; local idexpression struct inode *node2; local idexpression struct iattr *attr1; local idexpression struct iattr *attr2; local idexpression struct iattr attr; identifier i_xtime1 =~ "^i_[acm]time$"; identifier i_xtime2 =~ "^i_[acm]time$"; identifier ia_xtime1 =~ "^ia_[acm]time$"; identifier ia_xtime2 =~ "^ia_[acm]time$"; @@ ( ( - struct timespec ts; + struct timespec64 ts; | - struct timespec ts = current_time(inode_node); + struct timespec64 ts = current_time(inode_node); ) <+... when != ts ( - timespec_equal(&inode_node->i_xtime, &ts) + timespec64_equal(&inode_node->i_xtime, &ts) | - timespec_equal(&ts, &inode_node->i_xtime) + timespec64_equal(&ts, &inode_node->i_xtime) | - timespec_compare(&inode_node->i_xtime, &ts) + timespec64_compare(&inode_node->i_xtime, &ts) | - timespec_compare(&ts, &inode_node->i_xtime) + timespec64_compare(&ts, &inode_node->i_xtime) | ts = current_time(e) | fn_update_time(..., &ts,...) | inode_node->i_xtime = ts | node1->i_xtime = ts | ts = inode_node->i_xtime | <+... attr1->ia_xtime ...+> = ts | ts = attr1->ia_xtime | ts.tv_sec | ts.tv_nsec | btrfs_set_stack_timespec_sec(..., ts.tv_sec) | btrfs_set_stack_timespec_nsec(..., ts.tv_nsec) | - ts = timespec64_to_timespec( + ts = ... -) | - ts = ktime_to_timespec( + ts = ktime_to_timespec64( ...) | - ts = E3 + ts = timespec_to_timespec64(E3) | - ktime_get_real_ts(&ts) + ktime_get_real_ts64(&ts) | fn(..., - ts + timespec64_to_timespec(ts) ,...) ) ...+> ( <... when != ts - return ts; + return timespec64_to_timespec(ts); ...> ) | - timespec_equal(&node1->i_xtime1, &node2->i_xtime2) + timespec64_equal(&node1->i_xtime2, &node2->i_xtime2) | - timespec_equal(&node1->i_xtime1, &attr2->ia_xtime2) + timespec64_equal(&node1->i_xtime2, &attr2->ia_xtime2) | - timespec_compare(&node1->i_xtime1, &node2->i_xtime2) + timespec64_compare(&node1->i_xtime1, &node2->i_xtime2) | node1->i_xtime1 = - timespec_trunc(attr1->ia_xtime1, + timespec64_trunc(attr1->ia_xtime1, ...) | - attr1->ia_xtime1 = timespec_trunc(attr2->ia_xtime2, + attr1->ia_xtime1 = timespec64_trunc(attr2->ia_xtime2, ...) | - ktime_get_real_ts(&attr1->ia_xtime1) + ktime_get_real_ts64(&attr1->ia_xtime1) | - ktime_get_real_ts(&attr.ia_xtime1) + ktime_get_real_ts64(&attr.ia_xtime1) ) @ depends on patch @ struct inode *node; struct iattr *attr; identifier fn; identifier i_xtime =~ "^i_[acm]time$"; identifier ia_xtime =~ "^ia_[acm]time$"; expression e; @@ ( - fn(node->i_xtime); + fn(timespec64_to_timespec(node->i_xtime)); | fn(..., - node->i_xtime); + timespec64_to_timespec(node->i_xtime)); | - e = fn(attr->ia_xtime); + e = fn(timespec64_to_timespec(attr->ia_xtime)); ) @ depends on patch forall @ struct inode *node; struct iattr *attr; identifier i_xtime =~ "^i_[acm]time$"; identifier ia_xtime =~ "^ia_[acm]time$"; identifier fn; @@ { + struct timespec ts; <+... ( + ts = timespec64_to_timespec(node->i_xtime); fn (..., - &node->i_xtime, + &ts, ...); | + ts = timespec64_to_timespec(attr->ia_xtime); fn (..., - &attr->ia_xtime, + &ts, ...); ) ...+> } @ depends on patch forall @ struct inode *node; struct iattr *attr; struct kstat *stat; identifier ia_xtime =~ "^ia_[acm]time$"; identifier i_xtime =~ "^i_[acm]time$"; identifier xtime =~ "^[acm]time$"; identifier fn, ret; @@ { + struct timespec ts; <+... ( + ts = timespec64_to_timespec(node->i_xtime); ret = fn (..., - &node->i_xtime, + &ts, ...); | + ts = timespec64_to_timespec(node->i_xtime); ret = fn (..., - &node->i_xtime); + &ts); | + ts = timespec64_to_timespec(attr->ia_xtime); ret = fn (..., - &attr->ia_xtime, + &ts, ...); | + ts = timespec64_to_timespec(attr->ia_xtime); ret = fn (..., - &attr->ia_xtime); + &ts); | + ts = timespec64_to_timespec(stat->xtime); ret = fn (..., - &stat->xtime); + &ts); ) ...+> } @ depends on patch @ struct inode *node; struct inode *node2; identifier i_xtime1 =~ "^i_[acm]time$"; identifier i_xtime2 =~ "^i_[acm]time$"; identifier i_xtime3 =~ "^i_[acm]time$"; struct iattr *attrp; struct iattr *attrp2; struct iattr attr ; identifier ia_xtime1 =~ "^ia_[acm]time$"; identifier ia_xtime2 =~ "^ia_[acm]time$"; struct kstat *stat; struct kstat stat1; struct timespec64 ts; identifier xtime =~ "^[acmb]time$"; expression e; @@ ( ( node->i_xtime2 \| attrp->ia_xtime2 \| attr.ia_xtime2 \) = node->i_xtime1 ; | node->i_xtime2 = \( node2->i_xtime1 \| timespec64_trunc(...) \); | node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \); | node->i_xtime1 = node->i_xtime3 = \(ts \| current_time(...) \); | stat->xtime = node2->i_xtime1; | stat1.xtime = node2->i_xtime1; | ( node->i_xtime2 \| attrp->ia_xtime2 \) = attrp->ia_xtime1 ; | ( attrp->ia_xtime1 \| attr.ia_xtime1 \) = attrp2->ia_xtime2; | - e = node->i_xtime1; + e = timespec64_to_timespec( node->i_xtime1 ); | - e = attrp->ia_xtime1; + e = timespec64_to_timespec( attrp->ia_xtime1 ); | node->i_xtime1 = current_time(...); | node->i_xtime2 = node->i_xtime1 = node->i_xtime3 = - e; + timespec_to_timespec64(e); | node->i_xtime1 = node->i_xtime3 = - e; + timespec_to_timespec64(e); | - node->i_xtime1 = e; + node->i_xtime1 = timespec_to_timespec64(e); ) Signed-off-by: Deepa Dinamani <deepa.kernel@gmail.com> Cc: <anton@tuxera.com> Cc: <balbi@kernel.org> Cc: <bfields@fieldses.org> Cc: <darrick.wong@oracle.com> Cc: <dhowells@redhat.com> Cc: <dsterba@suse.com> Cc: <dwmw2@infradead.org> Cc: <hch@lst.de> Cc: <hirofumi@mail.parknet.co.jp> Cc: <hubcap@omnibond.com> Cc: <jack@suse.com> Cc: <jaegeuk@kernel.org> Cc: <jaharkes@cs.cmu.edu> Cc: <jslaby@suse.com> Cc: <keescook@chromium.org> Cc: <mark@fasheh.com> Cc: <miklos@szeredi.hu> Cc: <nico@linaro.org> Cc: <reiserfs-devel@vger.kernel.org> Cc: <richard@nod.at> Cc: <sage@redhat.com> Cc: <sfrench@samba.org> Cc: <swhiteho@redhat.com> Cc: <tj@kernel.org> Cc: <trond.myklebust@primarydata.com> Cc: <tytso@mit.edu> Cc: <viro@zeniv.linux.org.uk>
2018-05-08 19:36:02 -07:00
int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags);
/* dir.c */
struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
umode_t mode);
int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat,
statx: Add a system call to make enhanced file info available Add a system call to make extended file information available, including file creation and some attribute flags where available through the underlying filesystem. The getattr inode operation is altered to take two additional arguments: a u32 request_mask and an unsigned int flags that indicate the synchronisation mode. This change is propagated to the vfs_getattr*() function. Functions like vfs_stat() are now inline wrappers around new functions vfs_statx() and vfs_statx_fd() to reduce stack usage. ======== OVERVIEW ======== The idea was initially proposed as a set of xattrs that could be retrieved with getxattr(), but the general preference proved to be for a new syscall with an extended stat structure. A number of requests were gathered for features to be included. The following have been included: (1) Make the fields a consistent size on all arches and make them large. (2) Spare space, request flags and information flags are provided for future expansion. (3) Better support for the y2038 problem [Arnd Bergmann] (tv_sec is an __s64). (4) Creation time: The SMB protocol carries the creation time, which could be exported by Samba, which will in turn help CIFS make use of FS-Cache as that can be used for coherency data (stx_btime). This is also specified in NFSv4 as a recommended attribute and could be exported by NFSD [Steve French]. (5) Lightweight stat: Ask for just those details of interest, and allow a netfs (such as NFS) to approximate anything not of interest, possibly without going to the server [Trond Myklebust, Ulrich Drepper, Andreas Dilger] (AT_STATX_DONT_SYNC). (6) Heavyweight stat: Force a netfs to go to the server, even if it thinks its cached attributes are up to date [Trond Myklebust] (AT_STATX_FORCE_SYNC). And the following have been left out for future extension: (7) Data version number: Could be used by userspace NFS servers [Aneesh Kumar]. Can also be used to modify fill_post_wcc() in NFSD which retrieves i_version directly, but has just called vfs_getattr(). It could get it from the kstat struct if it used vfs_xgetattr() instead. (There's disagreement on the exact semantics of a single field, since not all filesystems do this the same way). (8) BSD stat compatibility: Including more fields from the BSD stat such as creation time (st_btime) and inode generation number (st_gen) [Jeremy Allison, Bernd Schubert]. (9) Inode generation number: Useful for FUSE and userspace NFS servers [Bernd Schubert]. (This was asked for but later deemed unnecessary with the open-by-handle capability available and caused disagreement as to whether it's a security hole or not). (10) Extra coherency data may be useful in making backups [Andreas Dilger]. (No particular data were offered, but things like last backup timestamp, the data version number and the DOS archive bit would come into this category). (11) Allow the filesystem to indicate what it can/cannot provide: A filesystem can now say it doesn't support a standard stat feature if that isn't available, so if, for instance, inode numbers or UIDs don't exist or are fabricated locally... (This requires a separate system call - I have an fsinfo() call idea for this). (12) Store a 16-byte volume ID in the superblock that can be returned in struct xstat [Steve French]. (Deferred to fsinfo). (13) Include granularity fields in the time data to indicate the granularity of each of the times (NFSv4 time_delta) [Steve French]. (Deferred to fsinfo). (14) FS_IOC_GETFLAGS value. These could be translated to BSD's st_flags. Note that the Linux IOC flags are a mess and filesystems such as Ext4 define flags that aren't in linux/fs.h, so translation in the kernel may be a necessity (or, possibly, we provide the filesystem type too). (Some attributes are made available in stx_attributes, but the general feeling was that the IOC flags were to ext[234]-specific and shouldn't be exposed through statx this way). (15) Mask of features available on file (eg: ACLs, seclabel) [Brad Boyer, Michael Kerrisk]. (Deferred, probably to fsinfo. Finding out if there's an ACL or seclabal might require extra filesystem operations). (16) Femtosecond-resolution timestamps [Dave Chinner]. (A __reserved field has been left in the statx_timestamp struct for this - if there proves to be a need). (17) A set multiple attributes syscall to go with this. =============== NEW SYSTEM CALL =============== The new system call is: int ret = statx(int dfd, const char *filename, unsigned int flags, unsigned int mask, struct statx *buffer); The dfd, filename and flags parameters indicate the file to query, in a similar way to fstatat(). There is no equivalent of lstat() as that can be emulated with statx() by passing AT_SYMLINK_NOFOLLOW in flags. There is also no equivalent of fstat() as that can be emulated by passing a NULL filename to statx() with the fd of interest in dfd. Whether or not statx() synchronises the attributes with the backing store can be controlled by OR'ing a value into the flags argument (this typically only affects network filesystems): (1) AT_STATX_SYNC_AS_STAT tells statx() to behave as stat() does in this respect. (2) AT_STATX_FORCE_SYNC will require a network filesystem to synchronise its attributes with the server - which might require data writeback to occur to get the timestamps correct. (3) AT_STATX_DONT_SYNC will suppress synchronisation with the server in a network filesystem. The resulting values should be considered approximate. mask is a bitmask indicating the fields in struct statx that are of interest to the caller. The user should set this to STATX_BASIC_STATS to get the basic set returned by stat(). It should be noted that asking for more information may entail extra I/O operations. buffer points to the destination for the data. This must be 256 bytes in size. ====================== MAIN ATTRIBUTES RECORD ====================== The following structures are defined in which to return the main attribute set: struct statx_timestamp { __s64 tv_sec; __s32 tv_nsec; __s32 __reserved; }; struct statx { __u32 stx_mask; __u32 stx_blksize; __u64 stx_attributes; __u32 stx_nlink; __u32 stx_uid; __u32 stx_gid; __u16 stx_mode; __u16 __spare0[1]; __u64 stx_ino; __u64 stx_size; __u64 stx_blocks; __u64 __spare1[1]; struct statx_timestamp stx_atime; struct statx_timestamp stx_btime; struct statx_timestamp stx_ctime; struct statx_timestamp stx_mtime; __u32 stx_rdev_major; __u32 stx_rdev_minor; __u32 stx_dev_major; __u32 stx_dev_minor; __u64 __spare2[14]; }; The defined bits in request_mask and stx_mask are: STATX_TYPE Want/got stx_mode & S_IFMT STATX_MODE Want/got stx_mode & ~S_IFMT STATX_NLINK Want/got stx_nlink STATX_UID Want/got stx_uid STATX_GID Want/got stx_gid STATX_ATIME Want/got stx_atime{,_ns} STATX_MTIME Want/got stx_mtime{,_ns} STATX_CTIME Want/got stx_ctime{,_ns} STATX_INO Want/got stx_ino STATX_SIZE Want/got stx_size STATX_BLOCKS Want/got stx_blocks STATX_BASIC_STATS [The stuff in the normal stat struct] STATX_BTIME Want/got stx_btime{,_ns} STATX_ALL [All currently available stuff] stx_btime is the file creation time, stx_mask is a bitmask indicating the data provided and __spares*[] are where as-yet undefined fields can be placed. Time fields are structures with separate seconds and nanoseconds fields plus a reserved field in case we want to add even finer resolution. Note that times will be negative if before 1970; in such a case, the nanosecond fields will also be negative if not zero. The bits defined in the stx_attributes field convey information about a file, how it is accessed, where it is and what it does. The following attributes map to FS_*_FL flags and are the same numerical value: STATX_ATTR_COMPRESSED File is compressed by the fs STATX_ATTR_IMMUTABLE File is marked immutable STATX_ATTR_APPEND File is append-only STATX_ATTR_NODUMP File is not to be dumped STATX_ATTR_ENCRYPTED File requires key to decrypt in fs Within the kernel, the supported flags are listed by: KSTAT_ATTR_FS_IOC_FLAGS [Are any other IOC flags of sufficient general interest to be exposed through this interface?] New flags include: STATX_ATTR_AUTOMOUNT Object is an automount trigger These are for the use of GUI tools that might want to mark files specially, depending on what they are. Fields in struct statx come in a number of classes: (0) stx_dev_*, stx_blksize. These are local system information and are always available. (1) stx_mode, stx_nlinks, stx_uid, stx_gid, stx_[amc]time, stx_ino, stx_size, stx_blocks. These will be returned whether the caller asks for them or not. The corresponding bits in stx_mask will be set to indicate whether they actually have valid values. If the caller didn't ask for them, then they may be approximated. For example, NFS won't waste any time updating them from the server, unless as a byproduct of updating something requested. If the values don't actually exist for the underlying object (such as UID or GID on a DOS file), then the bit won't be set in the stx_mask, even if the caller asked for the value. In such a case, the returned value will be a fabrication. Note that there are instances where the type might not be valid, for instance Windows reparse points. (2) stx_rdev_*. This will be set only if stx_mode indicates we're looking at a blockdev or a chardev, otherwise will be 0. (3) stx_btime. Similar to (1), except this will be set to 0 if it doesn't exist. ======= TESTING ======= The following test program can be used to test the statx system call: samples/statx/test-statx.c Just compile and run, passing it paths to the files you want to examine. The file is built automatically if CONFIG_SAMPLES is enabled. Here's some example output. Firstly, an NFS directory that crosses to another FSID. Note that the AUTOMOUNT attribute is set because transiting this directory will cause d_automount to be invoked by the VFS. [root@andromeda ~]# /tmp/test-statx -A /warthog/data statx(/warthog/data) = 0 results=7ff Size: 4096 Blocks: 8 IO Block: 1048576 directory Device: 00:26 Inode: 1703937 Links: 125 Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041 Access: 2016-11-24 09:02:12.219699527+0000 Modify: 2016-11-17 10:44:36.225653653+0000 Change: 2016-11-17 10:44:36.225653653+0000 Attributes: 0000000000001000 (-------- -------- -------- -------- -------- -------- ---m---- --------) Secondly, the result of automounting on that directory. [root@andromeda ~]# /tmp/test-statx /warthog/data statx(/warthog/data) = 0 results=7ff Size: 4096 Blocks: 8 IO Block: 1048576 directory Device: 00:27 Inode: 2 Links: 125 Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041 Access: 2016-11-24 09:02:12.219699527+0000 Modify: 2016-11-17 10:44:36.225653653+0000 Change: 2016-11-17 10:44:36.225653653+0000 Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2017-01-31 16:46:22 +00:00
u32 request_mask, unsigned int flags);
int ubifs_check_dir_empty(struct inode *dir);
/* xattr.c */
int ubifs_xattr_set(struct inode *host, const char *name, const void *value,
size_t size, int flags, bool check_lock);
ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf,
size_t size);
#ifdef CONFIG_UBIFS_FS_XATTR
extern const struct xattr_handler *ubifs_xattr_handlers[];
ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size);
void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum);
int ubifs_purge_xattrs(struct inode *host);
#else
#define ubifs_listxattr NULL
#define ubifs_xattr_handlers NULL
static inline void ubifs_evict_xattr_inode(struct ubifs_info *c,
ino_t xattr_inum) { }
static inline int ubifs_purge_xattrs(struct inode *host)
{
return 0;
}
#endif
#ifdef CONFIG_UBIFS_FS_SECURITY
extern int ubifs_init_security(struct inode *dentry, struct inode *inode,
const struct qstr *qstr);
#else
static inline int ubifs_init_security(struct inode *dentry,
struct inode *inode, const struct qstr *qstr)
{
return 0;
}
#endif
/* super.c */
struct inode *ubifs_iget(struct super_block *sb, unsigned long inum);
/* recovery.c */
int ubifs_recover_master_node(struct ubifs_info *c);
int ubifs_write_rcvrd_mst_node(struct ubifs_info *c);
struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
int offs, void *sbuf, int jhead);
struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
int offs, void *sbuf);
int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf);
int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf);
int ubifs_rcvry_gc_commit(struct ubifs_info *c);
int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
int deletion, loff_t new_size);
int ubifs_recover_size(struct ubifs_info *c, bool in_place);
void ubifs_destroy_size_tree(struct ubifs_info *c);
/* ioctl.c */
int ubifs_fileattr_get(struct dentry *dentry, struct fileattr *fa);
int ubifs_fileattr_set(struct user_namespace *mnt_userns,
struct dentry *dentry, struct fileattr *fa);
long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
void ubifs_set_inode_flags(struct inode *inode);
#ifdef CONFIG_COMPAT
long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#endif
/* compressor.c */
int __init ubifs_compressors_init(void);
void ubifs_compressors_exit(void);
UBIFS: extend debug/message capabilities In the case where we have more than one volumes on different UBI devices, it may be not that easy to tell which volume prints the messages. Add ubi number and volume id in ubifs_msg/warn/error to help debug. These two values are passed by struct ubifs_info. For those where ubifs_info is not initialized yet, ubifs_* is replaced by pr_*. For those where ubifs_info is not avaliable, ubifs_info is passed to the calling function as a const parameter. The output looks like, [ 95.444879] UBIFS (ubi0:1): background thread "ubifs_bgt0_1" started, PID 696 [ 95.484688] UBIFS (ubi0:1): UBIFS: mounted UBI device 0, volume 1, name "test1" [ 95.484694] UBIFS (ubi0:1): LEB size: 126976 bytes (124 KiB), min./max. I/O unit sizes: 2048 bytes/2048 bytes [ 95.484699] UBIFS (ubi0:1): FS size: 30220288 bytes (28 MiB, 238 LEBs), journal size 1523712 bytes (1 MiB, 12 LEBs) [ 95.484703] UBIFS (ubi0:1): reserved for root: 1427378 bytes (1393 KiB) [ 95.484709] UBIFS (ubi0:1): media format: w4/r0 (latest is w4/r0), UUID 40DFFC0E-70BE-4193-8905-F7D6DFE60B17, small LPT model [ 95.489875] UBIFS (ubi1:0): background thread "ubifs_bgt1_0" started, PID 699 [ 95.529713] UBIFS (ubi1:0): UBIFS: mounted UBI device 1, volume 0, name "test2" [ 95.529718] UBIFS (ubi1:0): LEB size: 126976 bytes (124 KiB), min./max. I/O unit sizes: 2048 bytes/2048 bytes [ 95.529724] UBIFS (ubi1:0): FS size: 19808256 bytes (18 MiB, 156 LEBs), journal size 1015809 bytes (0 MiB, 8 LEBs) [ 95.529727] UBIFS (ubi1:0): reserved for root: 935592 bytes (913 KiB) [ 95.529733] UBIFS (ubi1:0): media format: w4/r0 (latest is w4/r0), UUID EEB7779D-F419-4CA9-811B-831CAC7233D4, small LPT model [ 954.264767] UBIFS error (ubi1:0 pid 756): ubifs_read_node: bad node type (255 but expected 6) [ 954.367030] UBIFS error (ubi1:0 pid 756): ubifs_read_node: bad node at LEB 0:0, LEB mapping status 1 Signed-off-by: Sheng Yong <shengyong1@huawei.com> Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
2015-03-20 10:39:42 +00:00
void ubifs_compress(const struct ubifs_info *c, const void *in_buf, int in_len,
void *out_buf, int *out_len, int *compr_type);
int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len,
void *out, int *out_len, int compr_type);
/* sysfs.c */
int ubifs_sysfs_init(void);
void ubifs_sysfs_exit(void);
int ubifs_sysfs_register(struct ubifs_info *c);
void ubifs_sysfs_unregister(struct ubifs_info *c);
#include "debug.h"
#include "misc.h"
#include "key.h"
#ifndef CONFIG_FS_ENCRYPTION
static inline int ubifs_encrypt(const struct inode *inode,
struct ubifs_data_node *dn,
unsigned int in_len, unsigned int *out_len,
int block)
{
struct ubifs_info *c = inode->i_sb->s_fs_info;
ubifs_assert(c, 0);
return -EOPNOTSUPP;
}
static inline int ubifs_decrypt(const struct inode *inode,
struct ubifs_data_node *dn,
unsigned int *out_len, int block)
{
struct ubifs_info *c = inode->i_sb->s_fs_info;
ubifs_assert(c, 0);
return -EOPNOTSUPP;
}
#else
/* crypto.c */
int ubifs_encrypt(const struct inode *inode, struct ubifs_data_node *dn,
unsigned int in_len, unsigned int *out_len, int block);
int ubifs_decrypt(const struct inode *inode, struct ubifs_data_node *dn,
unsigned int *out_len, int block);
#endif
extern const struct fscrypt_operations ubifs_crypt_operations;
/* Normal UBIFS messages */
__printf(2, 3)
void ubifs_msg(const struct ubifs_info *c, const char *fmt, ...);
__printf(2, 3)
void ubifs_err(const struct ubifs_info *c, const char *fmt, ...);
__printf(2, 3)
void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...);
/*
* A conditional variant of 'ubifs_err()' which doesn't output anything
Rename superblock flags (MS_xyz -> SB_xyz) This is a pure automated search-and-replace of the internal kernel superblock flags. The s_flags are now called SB_*, with the names and the values for the moment mirroring the MS_* flags that they're equivalent to. Note how the MS_xyz flags are the ones passed to the mount system call, while the SB_xyz flags are what we then use in sb->s_flags. The script to do this was: # places to look in; re security/*: it generally should *not* be # touched (that stuff parses mount(2) arguments directly), but # there are two places where we really deal with superblock flags. FILES="drivers/mtd drivers/staging/lustre fs ipc mm \ include/linux/fs.h include/uapi/linux/bfs_fs.h \ security/apparmor/apparmorfs.c security/apparmor/include/lib.h" # the list of MS_... constants SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \ DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \ POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \ I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \ ACTIVE NOUSER" SED_PROG= for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done # we want files that contain at least one of MS_..., # with fs/namespace.c and fs/pnode.c excluded. L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c') for f in $L; do sed -i $f $SED_PROG; done Requested-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-11-27 13:05:09 -08:00
* if probing (ie. SB_SILENT set).
*/
#define ubifs_errc(c, fmt, ...) \
do { \
if (!(c)->probing) \
ubifs_err(c, fmt, ##__VA_ARGS__); \
} while (0)
#endif /* !__UBIFS_H__ */