From 0c734c5ea76e333fbb8dd83b5bab46291b38096b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 14 Dec 2023 11:08:15 -0700 Subject: [PATCH] block: improve struct request_queue layout It's clearly been a while since someone looked at this, so I gave it a quick shot. There are few issues in here: - Random bundling of members that are mostly read-only and often written - Random holes that need not be there This moves the most frequently used bits into cacheline 1 and 2, with the 2nd one being more write intensive than the first one, which is basically read-only. Outside of making this work a bit more efficiently, it also reduces the size of struct request_queue for my test setup from 864 bytes (spanning 14 cachelines!) to 832 bytes and 13 cachelines. Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/d2b7b61c-4868-45c0-9060-4f9c73de9d7e@kernel.dk Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 95 ++++++++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 46 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 17c0a7d0d319..185ed3770e3a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -367,59 +367,51 @@ struct blk_independent_access_ranges { }; struct request_queue { - struct request *last_merge; - struct elevator_queue *elevator; - - struct percpu_ref q_usage_counter; - - struct blk_queue_stats *stats; - struct rq_qos *rq_qos; - struct mutex rq_qos_mutex; - - const struct blk_mq_ops *mq_ops; - - /* sw queues */ - struct blk_mq_ctx __percpu *queue_ctx; - - unsigned int queue_depth; - - /* hw dispatch queues */ - struct xarray hctx_table; - unsigned int nr_hw_queues; - /* * The queue owner gets to use this for whatever they like. * ll_rw_blk doesn't touch it. */ void *queuedata; + struct elevator_queue *elevator; + + const struct blk_mq_ops *mq_ops; + + /* sw queues */ + struct blk_mq_ctx __percpu *queue_ctx; + /* * various queue flags, see QUEUE_* below */ unsigned long queue_flags; - /* - * Number of contexts that have called blk_set_pm_only(). If this - * counter is above zero then only RQF_PM requests are processed. - */ - atomic_t pm_only; - /* - * ida allocated id for this queue. Used to index queues from - * ioctx. - */ - int id; + unsigned int rq_timeout; + + unsigned int queue_depth; + + refcount_t refs; + + /* hw dispatch queues */ + unsigned int nr_hw_queues; + struct xarray hctx_table; + + struct percpu_ref q_usage_counter; + + struct request *last_merge; spinlock_t queue_lock; - struct gendisk *disk; + int quiesce_depth; - refcount_t refs; + struct gendisk *disk; /* * mq queue kobject */ struct kobject *mq_kobj; + struct queue_limits limits; + #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity integrity; #endif /* CONFIG_BLK_DEV_INTEGRITY */ @@ -429,25 +421,41 @@ struct request_queue { enum rpm_status rpm_status; #endif + /* + * Number of contexts that have called blk_set_pm_only(). If this + * counter is above zero then only RQF_PM requests are processed. + */ + atomic_t pm_only; + + struct blk_queue_stats *stats; + struct rq_qos *rq_qos; + struct mutex rq_qos_mutex; + + /* + * ida allocated id for this queue. Used to index queues from + * ioctx. + */ + int id; + + unsigned int dma_pad_mask; + /* * queue settings */ unsigned long nr_requests; /* Max # of requests */ - unsigned int dma_pad_mask; - #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct blk_crypto_profile *crypto_profile; struct kobject *crypto_kobject; #endif - unsigned int rq_timeout; - struct timer_list timeout; struct work_struct timeout_work; atomic_t nr_active_requests_shared_tags; + unsigned int required_elevator_features; + struct blk_mq_tags *sched_shared_tags; struct list_head icq_list; @@ -458,11 +466,12 @@ struct request_queue { struct mutex blkcg_mutex; #endif - struct queue_limits limits; - - unsigned int required_elevator_features; - int node; + + spinlock_t requeue_lock; + struct list_head requeue_list; + struct delayed_work requeue_work; + #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -472,10 +481,6 @@ struct request_queue { struct blk_flush_queue *fq; struct list_head flush_list; - struct list_head requeue_list; - spinlock_t requeue_lock; - struct delayed_work requeue_work; - struct mutex sysfs_lock; struct mutex sysfs_dir_lock; @@ -500,8 +505,6 @@ struct request_queue { */ struct mutex mq_freeze_lock; - int quiesce_depth; - struct blk_mq_tag_set *tag_set; struct list_head tag_set_list;