linux/fs/ext4/fast_commit.h

187 lines
4.2 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __FAST_COMMIT_H__
#define __FAST_COMMIT_H__
/*
* Note this file is present in e2fsprogs/lib/ext2fs/fast_commit.h and
* linux/fs/ext4/fast_commit.h. These file should always be byte identical.
*/
/* Fast commit tags */
#define EXT4_FC_TAG_ADD_RANGE 0x0001
#define EXT4_FC_TAG_DEL_RANGE 0x0002
#define EXT4_FC_TAG_CREAT 0x0003
#define EXT4_FC_TAG_LINK 0x0004
#define EXT4_FC_TAG_UNLINK 0x0005
#define EXT4_FC_TAG_INODE 0x0006
#define EXT4_FC_TAG_PAD 0x0007
#define EXT4_FC_TAG_TAIL 0x0008
#define EXT4_FC_TAG_HEAD 0x0009
#define EXT4_FC_SUPPORTED_FEATURES 0x0
/* On disk fast commit tlv value structures */
/* Fast commit on disk tag length structure */
struct ext4_fc_tl {
__le16 fc_tag;
__le16 fc_len;
};
/* Value structure for tag EXT4_FC_TAG_HEAD. */
struct ext4_fc_head {
__le32 fc_features;
__le32 fc_tid;
};
/* Value structure for EXT4_FC_TAG_ADD_RANGE. */
struct ext4_fc_add_range {
__le32 fc_ino;
__u8 fc_ex[12];
};
/* Value structure for tag EXT4_FC_TAG_DEL_RANGE. */
struct ext4_fc_del_range {
__le32 fc_ino;
__le32 fc_lblk;
__le32 fc_len;
};
/*
* This is the value structure for tags EXT4_FC_TAG_CREAT, EXT4_FC_TAG_LINK
* and EXT4_FC_TAG_UNLINK.
*/
struct ext4_fc_dentry_info {
__le32 fc_parent_ino;
__le32 fc_ino;
__u8 fc_dname[];
};
/* Value structure for EXT4_FC_TAG_INODE and EXT4_FC_TAG_INODE_PARTIAL. */
struct ext4_fc_inode {
__le32 fc_ino;
__u8 fc_raw_inode[];
};
/* Value structure for tag EXT4_FC_TAG_TAIL. */
struct ext4_fc_tail {
__le32 fc_tid;
__le32 fc_crc;
};
/* Tag base length */
#define EXT4_FC_TAG_BASE_LEN (sizeof(struct ext4_fc_tl))
/*
* Fast commit status codes
*/
enum {
EXT4_FC_STATUS_OK = 0,
EXT4_FC_STATUS_INELIGIBLE,
EXT4_FC_STATUS_SKIPPED,
EXT4_FC_STATUS_FAILED,
};
/*
* Fast commit ineligiblity reasons:
*/
enum {
EXT4_FC_REASON_XATTR = 0,
EXT4_FC_REASON_CROSS_RENAME,
EXT4_FC_REASON_JOURNAL_FLAG_CHANGE,
EXT4_FC_REASON_NOMEM,
EXT4_FC_REASON_SWAP_BOOT,
EXT4_FC_REASON_RESIZE,
EXT4_FC_REASON_RENAME_DIR,
EXT4_FC_REASON_FALLOC_RANGE,
EXT4_FC_REASON_INODE_JOURNAL_DATA,
EXT4_FC_REASON_MAX
};
#ifdef __KERNEL__
/*
* In memory list of dentry updates that are performed on the file
* system used by fast commit code.
*/
struct ext4_fc_dentry_update {
int fcd_op; /* Type of update create / unlink / link */
int fcd_parent; /* Parent inode number */
int fcd_ino; /* Inode number */
struct qstr fcd_name; /* Dirent name */
unsigned char fcd_iname[DNAME_INLINE_LEN]; /* Dirent name string */
struct list_head fcd_list;
ext4: improve fast_commit performance and scalability Currently ext4_fc_commit_dentry_updates() is of quadratic time complexity, which is causing performance bottlenecks with high threads/file/dir count with fs_mark. This patch makes commit dentry updates (and hence ext4_fc_commit()) path to linear time complexity. Hence improves the performance of workloads which does fsync on multiple threads/open files one-by-one. Absolute numbers in avg file creates per sec (from fs_mark in 1K order) ======================================================================= no. Order without-patch(K) with-patch(K) Diff(%) 1 1 16.90 17.51 +3.60 2 2,2 32.08 31.80 -0.87 3 3,3 53.97 55.01 +1.92 4 4,4 78.94 76.90 -2.58 5 5,5 95.82 95.37 -0.46 6 6,6 87.92 103.38 +17.58 7 6,10 0.73 126.13 +17178.08 8 6,14 2.33 143.19 +6045.49 workload type ============== For e.g. 7th row order of 6,10 (2^6 == 64 && 2^10 == 1024) echo /run/riteshh/mnt/{1..64} |sed -E 's/[[:space:]]+/ -d /g' \ | xargs -I {} bash -c "sudo fs_mark -L 100 -D 1024 -n 1024 -s0 -S5 -d {}" Perf profile (w/o patches) ============================= 87.15% [kernel] [k] ext4_fc_commit --> Heavy contention/bottleneck 1.98% [kernel] [k] perf_event_interrupt 0.96% [kernel] [k] power_pmu_enable 0.91% [kernel] [k] update_sd_lb_stats.constprop.0 0.67% [kernel] [k] ktime_get Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com> Reviewed-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com> Link: https://lore.kernel.org/r/930f35d4fd5f83e2673c868781d9ebf15e91bf4e.1645426817.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o <tytso@mit.edu>
2022-02-21 13:26:15 +05:30
struct list_head fcd_dilist;
};
struct ext4_fc_stats {
unsigned int fc_ineligible_reason_count[EXT4_FC_REASON_MAX];
unsigned long fc_num_commits;
unsigned long fc_ineligible_commits;
unsigned long fc_failed_commits;
unsigned long fc_skipped_commits;
unsigned long fc_numblks;
u64 s_fc_avg_commit_time;
};
#define EXT4_FC_REPLAY_REALLOC_INCREMENT 4
/*
* Physical block regions added to different inodes due to fast commit
* recovery. These are set during the SCAN phase. During the replay phase,
* our allocator excludes these from its allocation. This ensures that
* we don't accidentally allocating a block that is going to be used by
* another inode.
*/
struct ext4_fc_alloc_region {
ext4_lblk_t lblk;
ext4_fsblk_t pblk;
int ino, len;
};
/*
* Fast commit replay state.
*/
struct ext4_fc_replay_state {
int fc_replay_num_tags;
int fc_replay_expected_off;
int fc_current_pass;
int fc_cur_tag;
int fc_crc;
struct ext4_fc_alloc_region *fc_regions;
int fc_regions_size, fc_regions_used, fc_regions_valid;
int *fc_modified_inodes;
int fc_modified_inodes_used, fc_modified_inodes_size;
};
#define region_last(__region) (((__region)->lblk) + ((__region)->len) - 1)
#endif
static inline const char *tag2str(__u16 tag)
{
switch (tag) {
case EXT4_FC_TAG_LINK:
return "ADD_ENTRY";
case EXT4_FC_TAG_UNLINK:
return "DEL_ENTRY";
case EXT4_FC_TAG_ADD_RANGE:
return "ADD_RANGE";
case EXT4_FC_TAG_CREAT:
return "CREAT_DENTRY";
case EXT4_FC_TAG_DEL_RANGE:
return "DEL_RANGE";
case EXT4_FC_TAG_INODE:
return "INODE";
case EXT4_FC_TAG_PAD:
return "PAD";
case EXT4_FC_TAG_TAIL:
return "TAIL";
case EXT4_FC_TAG_HEAD:
return "HEAD";
default:
return "ERROR";
}
}
#endif /* __FAST_COMMIT_H__ */