e30b5dca15
We (Linux Kernel Performance project) found a regression introduced by commit: f7fec032aa ext4: track all extent status in extent status tree The commit causes about 20% performance decrease in fio random write test. Profiler shows that rb_next() uses a lot of CPU time. The call stack is: rb_next ext4_es_find_delayed_extent ext4_map_blocks _ext4_get_block ext4_get_block_write __blockdev_direct_IO ext4_direct_IO generic_file_direct_write __generic_file_aio_write ext4_file_write aio_rw_vect_retry aio_run_iocb do_io_submit sys_io_submit system_call_fastpath io_submit td_io_getevents io_u_queued_complete thread_main main __libc_start_main The cause is that ext4_es_find_delayed_extent() doesn't have an upper bound, it keeps searching until a delayed extent is found. When there are a lots of non-delayed entries in the extent state tree, ext4_es_find_delayed_extent() may uses a lot of CPU time. Reported-by: LKP project <lkp@linux.intel.com> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Signed-off-by: Zheng Liu <wenqing.lz@taobao.com> Cc: "Theodore Ts'o" <tytso@mit.edu>
128 lines
3.4 KiB
C
128 lines
3.4 KiB
C
/*
|
|
* fs/ext4/extents_status.h
|
|
*
|
|
* Written by Yongqiang Yang <xiaoqiangnk@gmail.com>
|
|
* Modified by
|
|
* Allison Henderson <achender@linux.vnet.ibm.com>
|
|
* Zheng Liu <wenqing.lz@taobao.com>
|
|
*
|
|
*/
|
|
|
|
#ifndef _EXT4_EXTENTS_STATUS_H
|
|
#define _EXT4_EXTENTS_STATUS_H
|
|
|
|
/*
|
|
* Turn on ES_DEBUG__ to get lots of info about extent status operations.
|
|
*/
|
|
#ifdef ES_DEBUG__
|
|
#define es_debug(fmt, ...) printk(fmt, ##__VA_ARGS__)
|
|
#else
|
|
#define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
|
|
#endif
|
|
|
|
/*
|
|
* With ES_AGGRESSIVE_TEST defined, the result of es caching will be
|
|
* checked with old map_block's result.
|
|
*/
|
|
#define ES_AGGRESSIVE_TEST__
|
|
|
|
/*
|
|
* These flags live in the high bits of extent_status.es_pblk
|
|
*/
|
|
#define EXTENT_STATUS_WRITTEN (1ULL << 63)
|
|
#define EXTENT_STATUS_UNWRITTEN (1ULL << 62)
|
|
#define EXTENT_STATUS_DELAYED (1ULL << 61)
|
|
#define EXTENT_STATUS_HOLE (1ULL << 60)
|
|
|
|
#define EXTENT_STATUS_FLAGS (EXTENT_STATUS_WRITTEN | \
|
|
EXTENT_STATUS_UNWRITTEN | \
|
|
EXTENT_STATUS_DELAYED | \
|
|
EXTENT_STATUS_HOLE)
|
|
|
|
struct ext4_extent;
|
|
|
|
struct extent_status {
|
|
struct rb_node rb_node;
|
|
ext4_lblk_t es_lblk; /* first logical block extent covers */
|
|
ext4_lblk_t es_len; /* length of extent in block */
|
|
ext4_fsblk_t es_pblk; /* first physical block */
|
|
};
|
|
|
|
struct ext4_es_tree {
|
|
struct rb_root root;
|
|
struct extent_status *cache_es; /* recently accessed extent */
|
|
};
|
|
|
|
extern int __init ext4_init_es(void);
|
|
extern void ext4_exit_es(void);
|
|
extern void ext4_es_init_tree(struct ext4_es_tree *tree);
|
|
|
|
extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
|
|
ext4_lblk_t len, ext4_fsblk_t pblk,
|
|
unsigned long long status);
|
|
extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
|
|
ext4_lblk_t len);
|
|
extern void ext4_es_find_delayed_extent_range(struct inode *inode,
|
|
ext4_lblk_t lblk, ext4_lblk_t end,
|
|
struct extent_status *es);
|
|
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
|
|
struct extent_status *es);
|
|
extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
|
|
|
|
static inline int ext4_es_is_written(struct extent_status *es)
|
|
{
|
|
return (es->es_pblk & EXTENT_STATUS_WRITTEN) != 0;
|
|
}
|
|
|
|
static inline int ext4_es_is_unwritten(struct extent_status *es)
|
|
{
|
|
return (es->es_pblk & EXTENT_STATUS_UNWRITTEN) != 0;
|
|
}
|
|
|
|
static inline int ext4_es_is_delayed(struct extent_status *es)
|
|
{
|
|
return (es->es_pblk & EXTENT_STATUS_DELAYED) != 0;
|
|
}
|
|
|
|
static inline int ext4_es_is_hole(struct extent_status *es)
|
|
{
|
|
return (es->es_pblk & EXTENT_STATUS_HOLE) != 0;
|
|
}
|
|
|
|
static inline ext4_fsblk_t ext4_es_status(struct extent_status *es)
|
|
{
|
|
return (es->es_pblk & EXTENT_STATUS_FLAGS);
|
|
}
|
|
|
|
static inline ext4_fsblk_t ext4_es_pblock(struct extent_status *es)
|
|
{
|
|
return (es->es_pblk & ~EXTENT_STATUS_FLAGS);
|
|
}
|
|
|
|
static inline void ext4_es_store_pblock(struct extent_status *es,
|
|
ext4_fsblk_t pb)
|
|
{
|
|
ext4_fsblk_t block;
|
|
|
|
block = (pb & ~EXTENT_STATUS_FLAGS) |
|
|
(es->es_pblk & EXTENT_STATUS_FLAGS);
|
|
es->es_pblk = block;
|
|
}
|
|
|
|
static inline void ext4_es_store_status(struct extent_status *es,
|
|
unsigned long long status)
|
|
{
|
|
ext4_fsblk_t block;
|
|
|
|
block = (status & EXTENT_STATUS_FLAGS) |
|
|
(es->es_pblk & ~EXTENT_STATUS_FLAGS);
|
|
es->es_pblk = block;
|
|
}
|
|
|
|
extern void ext4_es_register_shrinker(struct super_block *sb);
|
|
extern void ext4_es_unregister_shrinker(struct super_block *sb);
|
|
extern void ext4_es_lru_add(struct inode *inode);
|
|
extern void ext4_es_lru_del(struct inode *inode);
|
|
|
|
#endif /* _EXT4_EXTENTS_STATUS_H */
|